69 #include "llvm/IR/IntrinsicsPowerPC.h"
102 using namespace llvm;
104 #define DEBUG_TYPE "ppc-lowering"
110 cl::desc(
"disable setting the node scheduling preference to ILP on PPC"),
cl::Hidden);
125 "ppc-quadword-atomics",
131 cl::desc(
"disable vector permute decomposition"),
135 "disable-auto-paired-vec-st",
136 cl::desc(
"disable automatically generated 32byte paired vector stores"),
139 STATISTIC(NumTailCalls,
"Number of tail calls");
140 STATISTIC(NumSiblingCalls,
"Number of sibling calls");
141 STATISTIC(ShufflesHandledWithVPERM,
"Number of shuffles lowered to a VPERM");
142 STATISTIC(NumDynamicAllocaProbed,
"Number of dynamic stack allocation probed");
159 initializeAddrModeMap();
162 bool isPPC64 = Subtarget.
isPPC64();
226 if (!Subtarget.
hasSPE()) {
235 for (
MVT VT : ScalarIntVTs) {
245 if (isPPC64 || Subtarget.
hasFPCVT()) {
354 if (!Subtarget.
hasSPE()) {
543 if (
TM.Options.UnsafeFPMath) {
769 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
998 if (
TM.Options.UnsafeFPMath) {
1235 }
else if (Subtarget.
hasVSX()) {
1308 if (Subtarget.
hasMMA()) {
1506 void PPCTargetLowering::initializeAddrModeMap() {
1557 if (MaxAlign == MaxMaxAlign)
1559 if (
VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1560 if (MaxMaxAlign >= 32 &&
1561 VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1562 MaxAlign =
Align(32);
1563 else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1565 MaxAlign =
Align(16);
1566 }
else if (
ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1569 if (EltAlign > MaxAlign)
1570 MaxAlign = EltAlign;
1571 }
else if (
StructType *STy = dyn_cast<StructType>(Ty)) {
1572 for (
auto *EltTy : STy->elements()) {
1575 if (EltAlign > MaxAlign)
1576 MaxAlign = EltAlign;
1577 if (MaxAlign == MaxMaxAlign)
1592 return Alignment.value();
1600 return Subtarget.
hasSPE();
1622 return "PPCISD::FP_TO_UINT_IN_VSR,";
1624 return "PPCISD::FP_TO_SINT_IN_VSR";
1628 return "PPCISD::FTSQRT";
1630 return "PPCISD::FSQRT";
1635 return "PPCISD::XXSPLTI_SP_TO_DP";
1637 return "PPCISD::XXSPLTI32DX";
1659 return "PPCISD::CALL_RM";
1661 return "PPCISD::CALL_NOP_RM";
1663 return "PPCISD::CALL_NOTOC_RM";
1668 return "PPCISD::BCTRL_RM";
1670 return "PPCISD::BCTRL_LOAD_TOC_RM";
1682 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1684 return "PPCISD::ANDI_rec_1_EQ_BIT";
1686 return "PPCISD::ANDI_rec_1_GT_BIT";
1701 return "PPCISD::ST_VSR_SCAL_INT";
1727 return "PPCISD::PADDI_DTPREL";
1744 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1746 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1756 return "PPCISD::STRICT_FADDRTZ";
1758 return "PPCISD::STRICT_FCTIDZ";
1760 return "PPCISD::STRICT_FCTIWZ";
1762 return "PPCISD::STRICT_FCTIDUZ";
1764 return "PPCISD::STRICT_FCTIWUZ";
1766 return "PPCISD::STRICT_FCFID";
1768 return "PPCISD::STRICT_FCFIDU";
1770 return "PPCISD::STRICT_FCFIDS";
1772 return "PPCISD::STRICT_FCFIDUS";
1798 return CFP->getValueAPF().isZero();
1802 if (
const ConstantFP *CFP = dyn_cast<ConstantFP>(
CP->getConstVal()))
1803 return CFP->getValueAPF().isZero();
1811 return Op < 0 ||
Op == Val;
1823 if (ShuffleKind == 0) {
1826 for (
unsigned i = 0;
i != 16; ++
i)
1829 }
else if (ShuffleKind == 2) {
1832 for (
unsigned i = 0;
i != 16; ++
i)
1835 }
else if (ShuffleKind == 1) {
1836 unsigned j = IsLE ? 0 : 1;
1837 for (
unsigned i = 0;
i != 8; ++
i)
1854 if (ShuffleKind == 0) {
1857 for (
unsigned i = 0;
i != 16;
i += 2)
1861 }
else if (ShuffleKind == 2) {
1864 for (
unsigned i = 0;
i != 16;
i += 2)
1868 }
else if (ShuffleKind == 1) {
1869 unsigned j = IsLE ? 0 : 2;
1870 for (
unsigned i = 0;
i != 8;
i += 2)
1895 if (ShuffleKind == 0) {
1898 for (
unsigned i = 0;
i != 16;
i += 4)
1904 }
else if (ShuffleKind == 2) {
1907 for (
unsigned i = 0;
i != 16;
i += 4)
1913 }
else if (ShuffleKind == 1) {
1914 unsigned j = IsLE ? 0 : 4;
1915 for (
unsigned i = 0;
i != 8;
i += 4)
1932 unsigned LHSStart,
unsigned RHSStart) {
1935 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1936 "Unsupported merge size!");
1938 for (
unsigned i = 0;
i != 8/UnitSize; ++
i)
1939 for (
unsigned j = 0;
j != UnitSize; ++
j) {
1941 LHSStart+
j+
i*UnitSize) ||
1943 RHSStart+
j+
i*UnitSize))
1958 if (ShuffleKind == 1)
1960 else if (ShuffleKind == 2)
1965 if (ShuffleKind == 1)
1967 else if (ShuffleKind == 0)
1983 if (ShuffleKind == 1)
1985 else if (ShuffleKind == 2)
1990 if (ShuffleKind == 1)
1992 else if (ShuffleKind == 0)
2042 unsigned RHSStartValue) {
2046 for (
unsigned i = 0;
i < 2; ++
i)
2047 for (
unsigned j = 0;
j < 4; ++
j)
2049 i*RHSStartValue+
j+IndexOffset) ||
2051 i*RHSStartValue+
j+IndexOffset+8))
2073 unsigned indexOffset = CheckEven ? 4 : 0;
2074 if (ShuffleKind == 1)
2076 else if (ShuffleKind == 2)
2082 unsigned indexOffset = CheckEven ? 0 : 4;
2083 if (ShuffleKind == 1)
2085 else if (ShuffleKind == 0)
2111 if (
i == 16)
return -1;
2116 if (ShiftAmt <
i)
return -1;
2121 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2123 for (++
i;
i != 16; ++
i)
2126 }
else if (ShuffleKind == 1) {
2128 for (++
i;
i != 16; ++
i)
2135 ShiftAmt = 16 - ShiftAmt;
2144 EVT VT =
N->getValueType(0);
2146 return EltSize == 8 &&
N->getMaskElt(0) ==
N->getMaskElt(1);
2149 EltSize <= 8 &&
"Can only handle 1,2,4,8 byte element sizes");
2153 if (
N->getMaskElt(0) % EltSize != 0)
2158 unsigned ElementBase =
N->getMaskElt(0);
2161 if (ElementBase >= 16)
2166 for (
unsigned i = 1;
i != EltSize; ++
i)
2167 if (
N->getMaskElt(
i) < 0 ||
N->getMaskElt(
i) != (
int)(
i+ElementBase))
2170 for (
unsigned i = EltSize,
e = 16;
i !=
e;
i += EltSize) {
2171 if (
N->getMaskElt(
i) < 0)
continue;
2172 for (
unsigned j = 0;
j != EltSize; ++
j)
2173 if (
N->getMaskElt(
i+
j) !=
N->getMaskElt(
j))
2191 "Unexpected element width.");
2192 assert((StepLen == 1 || StepLen == -1) &&
"Unexpected element width.");
2194 unsigned NumOfElem = 16 /
Width;
2195 unsigned MaskVal[16];
2196 for (
unsigned i = 0;
i < NumOfElem; ++
i) {
2197 MaskVal[0] =
N->getMaskElt(
i *
Width);
2198 if ((StepLen == 1) && (MaskVal[0] %
Width)) {
2200 }
else if ((StepLen == -1) && ((MaskVal[0] + 1) %
Width)) {
2204 for (
unsigned int j = 1;
j <
Width; ++
j) {
2205 MaskVal[
j] =
N->getMaskElt(
i *
Width +
j);
2206 if (MaskVal[
j] != MaskVal[
j-1] + StepLen) {
2216 unsigned &InsertAtByte,
bool &Swap,
bool IsLE) {
2221 unsigned M0 =
N->getMaskElt(0) / 4;
2222 unsigned M1 =
N->getMaskElt(4) / 4;
2223 unsigned M2 =
N->getMaskElt(8) / 4;
2224 unsigned M3 =
N->getMaskElt(12) / 4;
2225 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2226 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2231 if ((
M0 > 3 &&
M1 == 1 && M2 == 2 && M3 == 3) ||
2232 (
M0 < 4 &&
M1 == 5 && M2 == 6 && M3 == 7)) {
2233 ShiftElts = IsLE ? LittleEndianShifts[
M0 & 0x3] : BigEndianShifts[
M0 & 0x3];
2234 InsertAtByte = IsLE ? 12 : 0;
2239 if ((
M1 > 3 &&
M0 == 0 && M2 == 2 && M3 == 3) ||
2240 (
M1 < 4 &&
M0 == 4 && M2 == 6 && M3 == 7)) {
2241 ShiftElts = IsLE ? LittleEndianShifts[
M1 & 0x3] : BigEndianShifts[
M1 & 0x3];
2242 InsertAtByte = IsLE ? 8 : 4;
2247 if ((M2 > 3 &&
M0 == 0 &&
M1 == 1 && M3 == 3) ||
2248 (M2 < 4 &&
M0 == 4 &&
M1 == 5 && M3 == 7)) {
2249 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2250 InsertAtByte = IsLE ? 4 : 8;
2255 if ((M3 > 3 &&
M0 == 0 &&
M1 == 1 && M2 == 2) ||
2256 (M3 < 4 &&
M0 == 4 &&
M1 == 5 && M2 == 6)) {
2257 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2258 InsertAtByte = IsLE ? 0 : 12;
2265 if (
N->getOperand(1).isUndef()) {
2268 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2269 if (
M0 == XXINSERTWSrcElem &&
M1 == 1 && M2 == 2 && M3 == 3) {
2270 InsertAtByte = IsLE ? 12 : 0;
2273 if (
M0 == 0 &&
M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2274 InsertAtByte = IsLE ? 8 : 4;
2277 if (
M0 == 0 &&
M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2278 InsertAtByte = IsLE ? 4 : 8;
2281 if (
M0 == 0 &&
M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2282 InsertAtByte = IsLE ? 0 : 12;
2291 bool &Swap,
bool IsLE) {
2298 unsigned M0 =
N->getMaskElt(0) / 4;
2299 unsigned M1 =
N->getMaskElt(4) / 4;
2300 unsigned M2 =
N->getMaskElt(8) / 4;
2301 unsigned M3 =
N->getMaskElt(12) / 4;
2305 if (
N->getOperand(1).isUndef()) {
2306 assert(
M0 < 4 &&
"Indexing into an undef vector?");
2307 if (
M1 != (
M0 + 1) % 4 || M2 != (
M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2310 ShiftElts = IsLE ? (4 -
M0) % 4 :
M0;
2316 if (
M1 != (
M0 + 1) % 8 || M2 != (
M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2320 if (
M0 == 0 ||
M0 == 7 ||
M0 == 6 ||
M0 == 5) {
2325 ShiftElts = (8 -
M0) % 8;
2326 }
else if (
M0 == 4 ||
M0 == 3 ||
M0 == 2 ||
M0 == 1) {
2331 ShiftElts = (4 -
M0) % 4;
2336 if (
M0 == 0 ||
M0 == 1 ||
M0 == 2 ||
M0 == 3) {
2341 }
else if (
M0 == 4 ||
M0 == 5 ||
M0 == 6 ||
M0 == 7) {
2358 for (
int i = 0;
i < 16;
i +=
Width)
2359 if (
N->getMaskElt(
i) !=
i +
Width - 1)
2390 bool &Swap,
bool IsLE) {
2397 unsigned M0 =
N->getMaskElt(0) / 8;
2398 unsigned M1 =
N->getMaskElt(8) / 8;
2399 assert(((
M0 |
M1) < 4) &&
"A mask element out of bounds?");
2403 if (
N->getOperand(1).isUndef()) {
2404 if ((
M0 |
M1) < 2) {
2405 DM = IsLE ? (((~
M1) & 1) << 1) + ((~
M0) & 1) : (
M0 << 1) + (
M1 & 1);
2413 if (
M0 > 1 &&
M1 < 2) {
2415 }
else if (M0 < 2 && M1 > 1) {
2423 DM = (((~
M1) & 1) << 1) + ((~
M0) & 1);
2426 if (M0 < 2 && M1 > 1) {
2428 }
else if (
M0 > 1 &&
M1 < 2) {
2436 DM = (
M0 << 1) + (
M1 & 1);
2456 return (16 / EltSize) - 1 - (SVOp->
getMaskElt(0) / EltSize);
2472 unsigned EltSize = 16/
N->getNumOperands();
2473 if (EltSize < ByteSize) {
2474 unsigned Multiple = ByteSize/EltSize;
2476 assert(Multiple > 1 && Multiple <= 4 &&
"How can this happen?");
2479 for (
unsigned i = 0,
e =
N->getNumOperands();
i !=
e; ++
i) {
2480 if (
N->getOperand(
i).isUndef())
continue;
2482 if (!isa<ConstantSDNode>(
N->getOperand(
i)))
return SDValue();
2484 if (!UniquedVals[
i&(Multiple-1)].getNode())
2485 UniquedVals[
i&(Multiple-1)] =
N->getOperand(
i);
2486 else if (UniquedVals[
i&(Multiple-1)] !=
N->getOperand(
i))
2496 bool LeadingZero =
true;
2497 bool LeadingOnes =
true;
2498 for (
unsigned i = 0;
i != Multiple-1; ++
i) {
2499 if (!UniquedVals[
i].getNode())
continue;
2506 if (!UniquedVals[Multiple-1].getNode())
2508 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2513 if (!UniquedVals[Multiple-1].getNode())
2515 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2524 for (
unsigned i = 0,
e =
N->getNumOperands();
i !=
e; ++
i) {
2525 if (
N->getOperand(
i).isUndef())
continue;
2527 OpVal =
N->getOperand(
i);
2528 else if (OpVal !=
N->getOperand(
i))
2534 unsigned ValSizeInBytes = EltSize;
2537 Value = CN->getZExtValue();
2539 assert(CN->getValueType(0) ==
MVT::f32 &&
"Only one legal FP vector type!");
2546 if (ValSizeInBytes < ByteSize)
return SDValue();
2557 if (MaskVal == 0)
return SDValue();
2560 if (SignExtend32<5>(MaskVal) == MaskVal)
2574 if (!isa<ConstantSDNode>(
N))
2577 Imm = (int16_t)cast<ConstantSDNode>(
N)->getZExtValue();
2579 return Imm == (int32_t)cast<ConstantSDNode>(
N)->getZExtValue();
2581 return Imm == (int64_t)cast<ConstantSDNode>(
N)->getZExtValue();
2599 return (~(LHSKnown.
Zero | RHSKnown.
Zero) == 0);
2608 if (
MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2609 if (Memop->getMemoryVT() ==
MVT::f64) {
2610 Base =
N.getOperand(0);
2623 if (!isa<ConstantSDNode>(
N))
2626 Imm = (int64_t)cast<ConstantSDNode>(
N)->getZExtValue();
2627 return isInt<34>(
Imm);
2654 (!EncodingAlignment ||
isAligned(*EncodingAlignment,
Imm)))
2659 Base =
N.getOperand(0);
2662 }
else if (
N.getOpcode() ==
ISD::OR) {
2664 (!EncodingAlignment ||
isAligned(*EncodingAlignment,
Imm)))
2676 if (~(LHSKnown.
Zero | RHSKnown.
Zero) == 0) {
2677 Base =
N.getOperand(0);
2748 (!EncodingAlignment ||
isAligned(*EncodingAlignment, imm))) {
2754 Base =
N.getOperand(0);
2757 }
else if (
N.getOperand(1).getOpcode() ==
PPCISD::Lo) {
2759 assert(!cast<ConstantSDNode>(
N.getOperand(1).getOperand(1))->getZExtValue()
2760 &&
"Cannot handle constant offsets yet!");
2761 Disp =
N.getOperand(1).getOperand(0);
2766 Base =
N.getOperand(0);
2769 }
else if (
N.getOpcode() ==
ISD::OR) {
2772 (!EncodingAlignment ||
isAligned(*EncodingAlignment, imm))) {
2782 dyn_cast<FrameIndexSDNode>(
N.getOperand(0))) {
2786 Base =
N.getOperand(0);
2799 (!EncodingAlignment ||
isAligned(*EncodingAlignment,
Imm))) {
2802 CN->getValueType(0));
2807 if ((CN->getValueType(0) ==
MVT::i32 ||
2808 (int64_t)CN->getZExtValue() == (
int)CN->getZExtValue()) &&
2809 (!EncodingAlignment ||
2810 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2811 int Addr = (
int)CN->getZExtValue();
2852 Base =
N.getOperand(0);
2868 Base =
N.getOperand(0);
2901 !
N.getOperand(1).hasOneUse() || !
N.getOperand(0).hasOneUse())) {
2902 Base =
N.getOperand(0);
2915 Ty *PCRelCand = dyn_cast<Ty>(
N);
2927 if (isValidPCRelNode<ConstantPoolSDNode>(
N) ||
2928 isValidPCRelNode<GlobalAddressSDNode>(
N) ||
2929 isValidPCRelNode<JumpTableSDNode>(
N) ||
2930 isValidPCRelNode<BlockAddressSDNode>(
N))
2946 EVT MemVT =
LD->getMemoryVT();
2953 if (!
ST.hasP8Vector())
2958 if (!
ST.hasP9Vector())
2971 if (UI.getUse().get().getResNo() == 0 &&
2993 Ptr =
LD->getBasePtr();
2994 VT =
LD->getMemoryVT();
2995 Alignment =
LD->getAlign();
2997 Ptr =
ST->getBasePtr();
2998 VT =
ST->getMemoryVT();
2999 Alignment =
ST->getAlign();
3022 if (isa<FrameIndexSDNode>(
Base) || isa<RegisterSDNode>(
Base))
3025 SDValue Val = cast<StoreSDNode>(
N)->getValue();
3043 if (Alignment <
Align(4))
3055 isa<ConstantSDNode>(Offset))
3070 unsigned &HiOpFlags,
unsigned &LoOpFlags,
3112 const bool Is64Bit = Subtarget.
isPPC64();
3127 EVT PtrVT =
Op.getValueType();
3143 return getTOCEntry(DAG,
SDLoc(
CP), GA);
3146 unsigned MOHiFlag, MOLoFlag;
3153 return getTOCEntry(DAG,
SDLoc(
CP), GA);
3213 EVT PtrVT =
Op.getValueType();
3231 return getTOCEntry(DAG,
SDLoc(
JT), GA);
3234 unsigned MOHiFlag, MOLoFlag;
3241 return getTOCEntry(DAG,
SDLoc(GA), GA);
3251 EVT PtrVT =
Op.getValueType();
3270 return getTOCEntry(DAG,
SDLoc(BASDN), GA);
3279 unsigned MOHiFlag, MOLoFlag;
3290 return LowerGlobalTLSAddressAIX(
Op, DAG);
3292 return LowerGlobalTLSAddressLinux(
Op, DAG);
3316 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3317 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3335 bool is64bit = Subtarget.
isPPC64();
3383 if (!
TM.isPositionIndependent())
3442 PtrVT, GOTPtr, TGA, TGA);
3444 PtrVT, TLSAddr, TGA);
3453 EVT PtrVT =
Op.getValueType();
3479 return getTOCEntry(DAG,
DL, GA);
3482 unsigned MOHiFlag, MOLoFlag;
3490 return getTOCEntry(DAG,
DL, GA);
3502 bool IsStrict =
Op->isStrictFPOpcode();
3504 cast<CondCodeSDNode>(
Op.getOperand(IsStrict ? 3 : 2))->get();
3508 EVT LHSVT =
LHS.getValueType();
3514 "SETCC for f128 is already legal under Power9!");
3525 assert(!IsStrict &&
"Don't know how to handle STRICT_FSETCC!");
3538 int ShuffV[] = {1, 0, 3, 2};
3560 if (
C->isAllOnes() ||
C->isZero())
3570 EVT VT =
Op.getValueType();
3579 EVT VT = Node->getValueType(0);
3581 SDValue InChain = Node->getOperand(0);
3582 SDValue VAListPtr = Node->getOperand(1);
3583 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3624 InChain = OverflowArea.
getValue(1);
3670 InChain = DAG.
getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3677 assert(!Subtarget.
isPPC64() &&
"LowerVACOPY is PPC32 only");
3692 return Op.getOperand(0);
3701 "Expecting Inline ASM node.");
3710 unsigned NumOps =
Op.getNumOperands();
3711 if (
Op.getOperand(NumOps - 1).getValueType() ==
MVT::Glue)
3716 unsigned Flags = cast<ConstantSDNode>(
Op.getOperand(
i))->getZExtValue();
3731 for (; NumVals; --NumVals, ++
i) {
3733 if (
Reg != PPC::LR &&
Reg != PPC::LR8)
3758 bool isPPC64 = (PtrVT ==
MVT::i64);
3762 TargetLowering::ArgListEntry Entry;
3764 Entry.Ty = IntPtrTy;
3765 Entry.Node = Trmp;
Args.push_back(Entry);
3768 Entry.Node = DAG.
getConstant(isPPC64 ? 48 : 40, dl,
3770 Args.push_back(Entry);
3772 Entry.Node = FPtr;
Args.push_back(Entry);
3773 Entry.Node = Nest;
Args.push_back(Entry);
3777 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3781 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
3782 return CallResult.second;
3796 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
3797 return DAG.
getStore(
Op.getOperand(0), dl, FR,
Op.getOperand(1),
3832 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3841 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
3856 nextPtr = DAG.
getNode(
ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3859 SDValue thirdStore = DAG.
getStore(secondStore, dl, StackOffsetFI, nextPtr,
3861 nextOffset += FrameOffset;
3862 nextPtr = DAG.
getNode(
ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3865 return DAG.
getStore(thirdStore, dl, FR, nextPtr,
3871 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3872 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3873 PPC::F11, PPC::F12, PPC::F13};
3878 unsigned PtrByteSize) {
3886 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3895 unsigned PtrByteSize) {
3896 Align Alignment(PtrByteSize);
3903 Alignment =
Align(16);
3908 if (BVAlign > PtrByteSize) {
3909 if (BVAlign.value() % PtrByteSize != 0)
3911 "ByVal alignment is not a multiple of the pointer size");
3913 Alignment = BVAlign;
3936 unsigned PtrByteSize,
unsigned LinkageSize,
3937 unsigned ParamAreaSize,
unsigned &ArgOffset,
3938 unsigned &AvailableFPRs,
3939 unsigned &AvailableVRs) {
3940 bool UseMemory =
false;
3945 ArgOffset =
alignTo(ArgOffset, Alignment);
3948 if (ArgOffset >= LinkageSize + ParamAreaSize)
3954 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3957 if (ArgOffset > LinkageSize + ParamAreaSize)
3964 if (AvailableFPRs > 0) {
3972 if (AvailableVRs > 0) {
3984 unsigned NumBytes) {
3988 SDValue PPCTargetLowering::LowerFormalArguments(
3993 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg,
Ins, dl, DAG,
3996 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg,
Ins, dl, DAG,
3999 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg,
Ins, dl, DAG,
4003 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4045 const Align PtrAlign(4);
4054 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4056 CCInfo.PreAnalyzeFormalArguments(
Ins);
4059 CCInfo.clearWasPPCF128();
4061 for (
unsigned i = 0,
e = ArgLocs.size();
i !=
e; ++
i) {
4074 RC = &PPC::GPRCRegClass;
4078 RC = &PPC::VSSRCRegClass;
4079 else if (Subtarget.
hasSPE())
4080 RC = &PPC::GPRCRegClass;
4082 RC = &PPC::F4RCRegClass;
4086 RC = &PPC::VSFRCRegClass;
4087 else if (Subtarget.
hasSPE())
4089 RC = &PPC::GPRCRegClass;
4091 RC = &PPC::F8RCRegClass;
4096 RC = &PPC::VRRCRegClass;
4099 RC = &PPC::VRRCRegClass;
4103 RC = &PPC::VRRCRegClass;
4111 assert(
i + 1 <
e &&
"No second half of double precision argument");
4128 InVals.push_back(ArgValue);
4139 ArgOffset += ArgSize - ObjSize;
4157 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
4162 unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
4163 MinReservedArea =
std::max(MinReservedArea, LinkageSize);
4180 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4185 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4197 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4202 CCInfo.getNextStackOffset(),
true));
4211 for (
unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4215 VReg = MF.
addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4220 MemOps.push_back(
Store);
4230 for (
unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4234 VReg = MF.
addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4239 MemOps.push_back(
Store);
4247 if (!MemOps.empty())
4258 const SDLoc &dl)
const {
4269 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4282 "fastcc not supported on varargs functions");
4288 unsigned PtrByteSize = 8;
4292 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4293 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4297 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4310 bool HasParameterArea = !isELFv2ABI || isVarArg;
4311 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4312 unsigned NumBytes = LinkageSize;
4313 unsigned AvailableFPRs = Num_FPR_Regs;
4314 unsigned AvailableVRs = Num_VR_Regs;
4315 for (
unsigned i = 0,
e =
Ins.size();
i !=
e; ++
i) {
4320 PtrByteSize, LinkageSize, ParamAreaSize,
4321 NumBytes, AvailableFPRs, AvailableVRs))
4322 HasParameterArea =
true;
4329 unsigned ArgOffset = LinkageSize;
4330 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4333 unsigned CurArgIdx = 0;
4334 for (
unsigned ArgNo = 0,
e =
Ins.size(); ArgNo !=
e; ++ArgNo) {
4336 bool needsLoad =
false;
4337 EVT ObjectVT =
Ins[ArgNo].VT;
4338 EVT OrigVT =
Ins[ArgNo].ArgVT;
4340 unsigned ArgSize = ObjSize;
4342 if (
Ins[ArgNo].isOrigArg()) {
4343 std::advance(FuncArg,
Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4344 CurArgIdx =
Ins[ArgNo].getOrigArgIndex();
4349 unsigned CurArgOffset;
4351 auto ComputeArgOffset = [&]() {
4355 ArgOffset =
alignTo(ArgOffset, Alignment);
4356 CurArgOffset = ArgOffset;
4363 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4364 GPR_idx =
std::min(GPR_idx, Num_GPR_Regs);
4370 assert(
Ins[ArgNo].isOrigArg() &&
"Byval arguments cannot be implicit");
4377 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4388 InVals.push_back(FIN);
4399 if (HasParameterArea ||
4400 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4407 if (ObjSize < PtrByteSize) {
4411 if (!isLittleEndian) {
4415 InVals.push_back(
Arg);
4417 if (GPR_idx != Num_GPR_Regs) {
4425 MemOps.push_back(
Store);
4429 ArgOffset += PtrByteSize;
4435 InVals.push_back(FIN);
4438 for (
unsigned j = 0;
j < ArgSize;
j += PtrByteSize) {
4439 if (GPR_idx == Num_GPR_Regs)
4450 unsigned StoreSizeInBits =
std::min(PtrByteSize, (ObjSize -
j)) * 8;
4455 MemOps.push_back(
Store);
4458 ArgOffset += ArgSize;
4473 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4481 if (GPR_idx != Num_GPR_Regs) {
4489 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4495 ArgSize = PtrByteSize;
4506 if (FPR_idx != Num_FPR_Regs) {
4512 ? &PPC::VSSRCRegClass
4513 : &PPC::F4RCRegClass);
4516 ? &PPC::VSFRCRegClass
4517 : &PPC::F8RCRegClass);
4533 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4552 ArgOffset += ArgSize;
4554 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4568 if (VR_idx != Num_VR_Regs) {
4585 if (ObjSize < ArgSize && !isLittleEndian)
4586 CurArgOffset += ArgSize - ObjSize;
4592 InVals.push_back(ArgVal);
4596 unsigned MinReservedArea;
4597 if (HasParameterArea)
4598 MinReservedArea =
std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4600 MinReservedArea = LinkageSize;
4617 int Depth = ArgOffset;
4626 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4627 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4632 MemOps.push_back(
Store);
4639 if (!MemOps.empty())
4648 unsigned ParamSize) {
4650 if (!isTailCall)
return 0;
4654 int SPDiff = (
int)CallerMinReservedArea - (
int)ParamSize;
4656 if (SPDiff < FI->getTailCallSPDelta())
4671 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4687 if (!
TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4693 const Function *
F = dyn_cast<Function>(GV);
4694 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4699 F = dyn_cast<Function>(GlobalObj);
4732 if (
TM.getFunctionSections() || GV->
hasComdat() || Caller->hasComdat() ||
4735 if (
const auto *
F = dyn_cast<Function>(GV)) {
4736 if (
F->getSectionPrefix() != Caller->getSectionPrefix())
4748 const unsigned PtrByteSize = 8;
4752 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4753 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4757 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4761 const unsigned NumFPRs = 13;
4763 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4765 unsigned NumBytes = LinkageSize;
4766 unsigned AvailableFPRs = NumFPRs;
4767 unsigned AvailableVRs = NumVRs;
4770 if (
Param.Flags.isNest())
continue;
4773 LinkageSize, ParamAreaSize, NumBytes,
4774 AvailableFPRs, AvailableVRs))
4785 auto CalleeArgEnd = CB.
arg_end();
4788 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4789 const Value* CalleeArg = *CalleeArgIter;
4790 const Value* CallerArg = &(*CallerArgIter);
4791 if (CalleeArg == CallerArg)
4799 isa<UndefValue>(CalleeArg))
4817 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4827 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4833 if (
DisableSCO && !TailCallOpt)
return false;
4836 if (isVarArg)
return false;
4868 if (
Caller.getCallingConv() != CalleeCC &&
4915 PPCTargetLowering::IsEligibleForTailCallOptimization(
SDValue Callee,
4931 for (
unsigned i = 0;
i !=
Ins.size();
i++) {
4933 if (Flags.
isByVal())
return false;
4943 return G->getGlobal()->hasHiddenVisibility()
4944 ||
G->getGlobal()->hasProtectedVisibility();
4954 if (!
C)
return nullptr;
4956 int Addr =
C->getZExtValue();
4957 if ((
Addr & 3) != 0 ||
4963 (
int)
C->getZExtValue() >> 2,
SDLoc(
Op),
4970 struct TailCallArgumentInfo {
4975 TailCallArgumentInfo() =
default;
4985 for (
unsigned i = 0,
e = TailCallArgs.size();
i !=
e; ++
i) {
4987 SDValue FIN = TailCallArgs[
i].FrameIdxOp;
4988 int FI = TailCallArgs[
i].FrameIdx;
4990 MemOpChains.push_back(DAG.
getStore(
4991 Chain, dl,
Arg, FIN,
5000 int SPDiff,
const SDLoc &dl) {
5006 bool isPPC64 = Subtarget.
isPPC64();
5007 int SlotSize = isPPC64 ? 8 : 4;
5008 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5010 NewRetAddrLoc,
true);
5013 Chain = DAG.
getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5025 int Offset = ArgOffset + SPDiff;
5026 uint32_t OpSize = (
Arg.getValueSizeInBits() + 7) / 8;
5030 TailCallArgumentInfo
Info;
5032 Info.FrameIdxOp = FIN;
5034 TailCallArguments.push_back(
Info);
5040 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5046 LROpOut = getReturnAddrFrameIndex(DAG);
5063 return DAG.
getMemcpy(Chain, dl, Dst, Src, SizeNode,
5072 SDValue PtrOff,
int SPDiff,
unsigned ArgOffset,
bool isPPC64,
5086 MemOpChains.push_back(
5095 const SDLoc &dl,
int SPDiff,
unsigned NumBytes,
SDValue LROp,
5105 if (!MemOpChains2.empty())
5125 return G->getGlobal()->getValueType()->isFunctionTy();
5131 SDValue PPCTargetLowering::LowerCallResult(
5139 CCRetInfo.AnalyzeCallResult(
5145 for (
unsigned i = 0,
e = RVLocs.size();
i !=
e; ++
i) {
5154 Chain =
Lo.getValue(1);
5155 InFlag =
Lo.getValue(2);
5159 Chain =
Hi.getValue(1);
5160 InFlag =
Hi.getValue(2);
5189 InVals.push_back(Val);
5226 bool IsStrictFPCall =
false) {
5230 unsigned RetOpc = 0;
5259 if (IsStrictFPCall) {
5290 auto isLocalCallee = [&]() {
5296 !isa_and_nonnull<GlobalIFunc>(GV);
5307 const auto getAIXFuncEntryPointSymbolSDNode = [&](
const GlobalValue *GV) {
5321 assert(!isa<GlobalIFunc>(GV) &&
"IFunc is not supported on AIX.");
5322 return getAIXFuncEntryPointSymbolSDNode(GV);
5329 const char *SymName =
S->getSymbol();
5336 return getAIXFuncEntryPointSymbolSDNode(
F);
5342 const auto getExternalFunctionEntryPointSymbol = [&](
StringRef SymName) {
5350 SymName = getExternalFunctionEntryPointSymbol(SymName)->
getName().
data();
5363 "Expected a CALLSEQ_STARTSDNode.");
5438 const unsigned Alignment = Subtarget.
isPPC64() ? 8 : 4;
5442 Alignment, MMOFlags);
5449 DAG.
getLoad(RegVT, dl, LDChain, AddTOC,
5456 DAG.
getLoad(RegVT, dl, LDChain, AddPtr,
5468 "Nest parameter is not supported on AIX.");
5484 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5487 const bool IsPPC64 = Subtarget.
isPPC64();
5492 Ops.push_back(Chain);
5516 Ops.push_back(AddTOC);
5527 Ops.push_back(DAG.
getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5536 for (
unsigned i = 0,
e = RegsToPass.size();
i !=
e; ++
i)
5538 RegsToPass[
i].second.getValueType()));
5555 assert(
Mask &&
"Missing call preserved mask for calling convention");
5560 Ops.push_back(Glue);
5563 SDValue PPCTargetLowering::FinishCall(
5578 if (!CFlags.IsIndirect)
5582 dl, CFlags.HasNest, Subtarget);
5592 if (CFlags.IsTailCall) {
5596 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5599 isa<ConstantSDNode>(Callee) ||
5601 "Expecting a global address, external symbol, absolute value, "
5602 "register or an indirect tail call when PC Relative calls are "
5606 "Unexpected call opcode for a tail call.");
5612 Chain = DAG.
getNode(CallOpc, dl, ReturnTypes, Ops);
5629 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg,
Ins, dl,
5653 isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5654 Callee, CallConv, CB, isVarArg, Outs,
Ins, DAG);
5656 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5670 isa<GlobalAddressSDNode>(Callee)) &&
5671 "Callee should be an llvm::Function object.");
5674 <<
"\nTCO callee: ");
5681 "site marked musttail");
5686 if (Subtarget.
useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5688 Callee = LowerGlobalAddress(Callee, DAG);
5691 CallConv, isTailCall, isVarArg, isPatchPoint,
5699 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals,
Ins, dl, DAG,
5704 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals,
Ins, dl, DAG,
5706 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals,
Ins, dl, DAG,
5710 SDValue PPCTargetLowering::LowerCall_32SVR4(
5721 const bool IsVarArg = CFlags.IsVarArg;
5722 const bool IsTailCall = CFlags.IsTailCall;
5728 const Align PtrAlign(4);
5753 CCInfo.PreAnalyzeCallOperands(Outs);
5759 unsigned NumArgs = Outs.size();
5761 for (
unsigned i = 0;
i != NumArgs; ++
i) {
5762 MVT ArgVT = Outs[
i].VT;
5766 if (Outs[
i].IsFixed) {
5776 errs() <<
"Call operand #" <<
i <<
" has unhandled type "
5786 CCInfo.clearWasPPCF128();
5793 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5800 unsigned NumBytes = CCByValInfo.getNextStackOffset();
5814 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5825 bool seenFloatArg =
false;
5830 for (
unsigned i = 0, RealArgIdx = 0,
j = 0,
e = ArgLocs.size();
5832 ++
i, ++RealArgIdx) {
5842 assert((
j < ByValArgLocs.size()) &&
"Index out of bounds!");
5865 Chain = CallSeqStart = NewCallSeqStart;
5891 RegsToPass.push_back(std::make_pair(ArgLocs[++
i].getLocReg(),
5894 RegsToPass.push_back(std::make_pair(VA.
getLocReg(),
Arg));
5905 MemOpChains.push_back(
5915 if (!MemOpChains.empty())
5921 for (
unsigned i = 0,
e = RegsToPass.size();
i !=
e; ++
i) {
5923 RegsToPass[
i].second, InFlag);
5931 SDValue Ops[] = { Chain, InFlag };
5943 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5944 Callee, SPDiff, NumBytes,
Ins, InVals, CB);
5949 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5961 return NewCallSeqStart;
5964 SDValue PPCTargetLowering::LowerCall_64SVR4(
5973 unsigned NumOps = Outs.size();
5974 bool IsSibCall =
false;
5978 unsigned PtrByteSize = 8;
5993 assert(!(IsFastCall && CFlags.IsVarArg) &&
5994 "fastcc not supported on varargs functions");
6001 unsigned NumBytes = LinkageSize;
6002 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6005 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6006 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6010 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6021 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6022 if (!HasParameterArea) {
6023 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6024 unsigned AvailableFPRs = NumFPRs;
6025 unsigned AvailableVRs = NumVRs;
6026 unsigned NumBytesTmp = NumBytes;
6027 for (
unsigned i = 0;
i != NumOps; ++
i) {
6028 if (Outs[
i].Flags.
isNest())
continue;
6030 PtrByteSize, LinkageSize, ParamAreaSize,
6031 NumBytesTmp, AvailableFPRs, AvailableVRs))
6032 HasParameterArea =
true;
6038 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6043 HasParameterArea =
false;
6046 for (
unsigned i = 0;
i != NumOps; ++
i) {
6048 EVT ArgVT = Outs[
i].VT;
6049 EVT OrigVT = Outs[
i].ArgVT;
6057 if (NumGPRsUsed > NumGPRs)
6058 HasParameterArea =
true;
6065 if (++NumGPRsUsed <= NumGPRs)
6075 if (++NumVRsUsed <= NumVRs)
6079 if (++NumVRsUsed <= NumVRs)
6084 if (++NumFPRsUsed <= NumFPRs)
6088 HasParameterArea =
true;
6095 NumBytes =
alignTo(NumBytes, Alignement);
6099 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6102 unsigned NumBytesActuallyUsed = NumBytes;
6112 if (HasParameterArea)
6113 NumBytes =
std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6115 NumBytes = LinkageSize;
6130 if (CFlags.IsTailCall)
6142 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6153 unsigned ArgOffset = LinkageSize;
6159 for (
unsigned i = 0;
i != NumOps; ++
i) {
6162 EVT ArgVT = Outs[
i].VT;
6163 EVT OrigVT = Outs[
i].ArgVT;
6172 auto ComputePtrOff = [&]() {
6176 ArgOffset =
alignTo(ArgOffset, Alignment);
6187 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6188 GPR_idx =
std::min(GPR_idx, NumGPRs);
6218 if (Size==1 || Size==2 || Size==4) {
6220 if (GPR_idx != NumGPRs) {
6223 MemOpChains.push_back(
Load.getValue(1));
6224 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Load));
6226 ArgOffset += PtrByteSize;
6231 if (GPR_idx == NumGPRs && Size < 8) {
6233 if (!isLittleEndian) {
6238 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
Arg, AddPtr,
6241 ArgOffset += PtrByteSize;
6250 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6251 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
Arg, PtrOff,
6256 if (Size < 8 && GPR_idx != NumGPRs) {
6266 if (!isLittleEndian) {
6270 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
Arg, AddPtr,
6277 MemOpChains.push_back(
Load.getValue(1));
6278 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Load));
6281 ArgOffset += PtrByteSize;
6287 for (
unsigned j=0;
j<
Size;
j+=PtrByteSize) {
6290 if (GPR_idx != NumGPRs) {
6291 unsigned LoadSizeInBits =
std::min(PtrByteSize, (Size -
j)) * 8;
6296 MemOpChains.push_back(
Load.getValue(1));
6297 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Load));
6298 ArgOffset += PtrByteSize;
6300 ArgOffset += ((
Size -
j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6307 switch (
Arg.getSimpleValueType().SimpleTy) {
6314 RegsToPass.push_back(std::make_pair(PPC::X11,
Arg));
6321 if (GPR_idx != NumGPRs) {
6322 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Arg));
6327 assert(HasParameterArea &&
6328 "Parameter area must exist to pass an argument in memory.");
6330 true, CFlags.IsTailCall,
false, MemOpChains,
6331 TailCallArguments, dl);
6333 ArgOffset += PtrByteSize;
6336 ArgOffset += PtrByteSize;
6349 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6350 bool NeededLoad =
false;
6353 if (FPR_idx != NumFPRs)
6354 RegsToPass.push_back(std::make_pair(
FPR[FPR_idx++],
Arg));
6357 if (!NeedGPROrStack)
6359 else if (GPR_idx != NumGPRs && !IsFastCall) {
6379 }
else if (ArgOffset % PtrByteSize != 0) {
6383 if (!isLittleEndian)
6391 if (!isLittleEndian)
6401 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6414 assert(HasParameterArea &&
6415 "Parameter area must exist to pass an argument in memory.");
6417 true, CFlags.IsTailCall,
false, MemOpChains,
6418 TailCallArguments, dl);
6425 if (!IsFastCall || NeededLoad) {
6429 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6449 if (CFlags.IsVarArg) {
6450 assert(HasParameterArea &&
6451 "Parameter area must exist if we have a varargs call.");
6456 MemOpChains.push_back(
Store);
6457 if (VR_idx != NumVRs) {
6460 MemOpChains.push_back(
Load.getValue(1));
6461 RegsToPass.push_back(std::make_pair(VR[VR_idx++],
Load));
6464 for (
unsigned i=0;
i<16;
i+=PtrByteSize) {
6465 if (GPR_idx == NumGPRs)
6471 MemOpChains.push_back(
Load.getValue(1));
6472 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
Load));
6478 if (VR_idx != NumVRs) {
6479 RegsToPass.push_back(std::make_pair(VR[VR_idx++],
Arg));
6484 assert(HasParameterArea &&
6485 "Parameter area must exist to pass an argument in memory.");
6487 true, CFlags.IsTailCall,
true, MemOpChains,
6488 TailCallArguments, dl);
6499 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6500 "mismatch in size of parameter area");
6501 (void)NumBytesActuallyUsed;
6503 if (!MemOpChains.empty())
6509 if (CFlags.IsIndirect) {
6513 assert(!CFlags.IsTailCall &&
"Indirect tails calls not supported");
6528 if (isELFv2ABI && !CFlags.IsPatchPoint)
6529 RegsToPass.push_back(std::make_pair((
unsigned)PPC::X12, Callee));
6535 for (
unsigned i = 0,
e = RegsToPass.size();
i !=
e; ++
i) {
6537 RegsToPass[
i].second, InFlag);
6541 if (CFlags.IsTailCall && !IsSibCall)
6545 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6546 Callee, SPDiff, NumBytes,
Ins, InVals, CB);
6553 "Required alignment greater than stack alignment.");
6573 return RequiredAlign <= 8;
6578 return RequiredAlign <= 4;
6588 const bool IsPPC64 = Subtarget.
isPPC64();
6600 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6602 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6603 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6607 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6608 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6613 "register width are not supported.");
6619 if (ByValSize == 0) {
6626 const unsigned StackSize =
alignTo(ByValSize, PtrAlign);
6628 for (
const unsigned E = Offset + StackSize; Offset <
E;
6629 Offset += PtrAlign.
value()) {
6648 assert(IsPPC64 &&
"PPC32 should have split i64 values.");
6655 LocInfo = ArgFlags.
isSExt() ? CCValAssign::LocInfo::SExt
6656 : CCValAssign::LocInfo::ZExt;
6670 const unsigned Offset =
6677 for (
unsigned I = 0;
I < StoreSize;
I += PtrAlign.
value()) {
6678 if (
unsigned Reg = State.
AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6679 assert(FReg &&
"An FPR should be available when a GPR is reserved.");
6730 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6736 while (NextRegIndex != GPRs.
size() &&
6741 assert(
Reg &&
"Allocating register unexpectedly failed.");
6754 for (
unsigned I = 0;
I !=
VecSize;
I += PtrSize)
6766 if (NextRegIndex == GPRs.
size()) {
6775 if (GPRs[NextRegIndex] == PPC::R9) {
6780 const unsigned FirstReg = State.
AllocateReg(PPC::R9);
6781 const unsigned SecondReg = State.
AllocateReg(PPC::R10);
6782 assert(FirstReg && SecondReg &&
6783 "Allocating R9 or R10 unexpectedly failed.");
6797 for (
unsigned I = 0;
I !=
VecSize;
I += PtrSize) {
6799 assert(
Reg &&
"Failed to allocated register for vararg vector argument");
6815 "i64 should have been split for 32-bit codegen.");
6823 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6825 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6827 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6835 return &PPC::VRRCRegClass;
6860 "Reg must be a valid argument register!");
6861 return LASize + 4 * (
Reg - PPC::R3);
6866 "Reg must be a valid argument register!");
6867 return LASize + 8 * (
Reg - PPC::X3);
6913 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6920 "Unexpected calling convention!");
6930 const bool IsPPC64 = Subtarget.
isPPC64();
6931 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6943 CCInfo.AllocateStack(LinkageSize,
Align(PtrByteSize));
6944 CCInfo.AnalyzeFormalArguments(
Ins,
CC_AIX);
6948 for (
size_t I = 0, End = ArgLocs.size();
I != End; ) {
6962 auto HandleMemLoc = [&]() {
6965 assert((ValSize <= LocSize) &&
6966 "Object size is larger than size of MemLoc");
6969 if (LocSize > ValSize)
6970 CurArgOffset += LocSize - ValSize;
6972 const bool IsImmutable =
6979 InVals.push_back(ArgValue);
6987 assert(isVarArg &&
"Only use custom memloc for vararg.");
6990 const unsigned OriginalValNo = VA.
getValNo();
6991 (void)OriginalValNo;
6993 auto HandleCustomVecRegLoc = [&]() {
6994 assert(
I != End && ArgLocs[
I].isRegLoc() && ArgLocs[
I].needsCustom() &&
6995 "Missing custom RegLoc.");
6998 "Unexpected Val type for custom RegLoc.");
7000 "ValNo mismatch between custom MemLoc and RegLoc.");
7011 HandleCustomVecRegLoc();
7012 HandleCustomVecRegLoc();
7016 if (
I != End && ArgLocs[
I].isRegLoc() && ArgLocs[
I].needsCustom()) {
7018 "Only 2 custom RegLocs expected for 64-bit codegen.");
7019 HandleCustomVecRegLoc();
7020 HandleCustomVecRegLoc();
7064 const unsigned Size =
7071 InVals.push_back(FIN);
7090 InVals.push_back(FIN);
7094 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7096 auto HandleRegLoc = [&, RegClass, LocVT](
const MCPhysReg PhysReg,
7109 CopyFrom.
getValue(1), dl, CopyFrom,
7113 MemOps.push_back(
Store);
7119 for (;
Offset != StackSize && ArgLocs[
I].isRegLoc();
7122 "RegLocs should be for ByVal argument.");
7129 if (Offset != StackSize) {
7131 "Expected MemLoc for remaining bytes.");
7132 assert(ArgLocs[
I].isMemLoc() &&
"Expected MemLoc for remaining bytes.");
7153 InVals.push_back(ArgValue);
7163 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7165 unsigned CallerReservedArea =
7166 std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7172 CallerReservedArea =
7182 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7184 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7185 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7186 const unsigned NumGPArgRegs =
array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7191 for (
unsigned GPRIndex =
7192 (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7193 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7196 IsPPC64 ? MF.
addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7197 : MF.
addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7202 MemOps.push_back(
Store);
7209 if (!MemOps.empty())
7215 SDValue PPCTargetLowering::LowerCall_AIX(
7228 "Unexpected calling convention!");
7230 if (CFlags.IsPatchPoint)
7237 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7245 const bool IsPPC64 = Subtarget.
isPPC64();
7247 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7248 CCInfo.AllocateStack(LinkageSize,
Align(PtrByteSize));
7249 CCInfo.AnalyzeCallOperands(Outs,
CC_AIX);
7257 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7258 const unsigned NumBytes =
std::max(LinkageSize + MinParameterSaveAreaSize,
7259 CCInfo.getNextStackOffset());
7275 for (
unsigned I = 0,
E = ArgLocs.size();
I !=
E;) {
7276 const unsigned ValNo = ArgLocs[
I].getValNo();
7289 auto GetLoad = [&](
EVT VT,
unsigned LoadOffset) {
7298 unsigned LoadOffset = 0;
7301 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[
I].isRegLoc()) {
7303 MemOpChains.push_back(
Load.getValue(1));
7304 LoadOffset += PtrByteSize;
7307 "Unexpected location for pass-by-value argument.");
7308 RegsToPass.push_back(std::make_pair(ByValVA.
getLocReg(),
Load));
7311 if (LoadOffset == ByValSize)
7315 assert(ArgLocs[
I].getValNo() == ValNo &&
7316 "Expected additional location for by-value argument.");
7318 if (ArgLocs[
I].isMemLoc()) {
7319 assert(LoadOffset < ByValSize &&
"Unexpected memloc for by-val arg.");
7324 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7330 CallSeqStart, MemcpyFlags, DAG, dl);
7339 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7340 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7341 "Unexpected register residue for by-value argument.");
7343 for (
unsigned Bytes = 0; Bytes != ResidueBytes;) {
7349 MemOpChains.push_back(
Load.getValue(1));
7357 "Unexpected load emitted during handling of pass-by-value "
7365 ResidueVal = ResidueVal ? DAG.
getNode(
ISD::OR, dl, PtrVT, ResidueVal,
7371 RegsToPass.push_back(std::make_pair(ByValVA.
getLocReg(), ResidueVal));
7393 RegsToPass.push_back(std::make_pair(VA.
getLocReg(),
Arg));
7400 assert(CFlags.IsVarArg &&
"Custom MemLocs only used for Vector args.");
7407 MemOpChains.push_back(
Store);
7408 const unsigned OriginalValNo = VA.
getValNo();
7410 unsigned LoadOffset = 0;
7411 auto HandleCustomVecRegLoc = [&]() {
7412 assert(
I !=
E &&
"Unexpected end of CCvalAssigns.");
7413 assert(ArgLocs[
I].isRegLoc() && ArgLocs[
I].needsCustom() &&
7414 "Expected custom RegLoc.");
7417 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7421 MemOpChains.push_back(
Load.getValue(1));
7422 RegsToPass.push_back(std::make_pair(RegVA.
getLocReg(),
Load));
7423 LoadOffset += PtrByteSize;
7429 HandleCustomVecRegLoc();
7430 HandleCustomVecRegLoc();
7432 if (
I !=
E && ArgLocs[
I].isRegLoc() && ArgLocs[
I].needsCustom() &&
7433 ArgLocs[
I].getValNo() == OriginalValNo) {
7435 "Only 2 custom RegLocs expected for 64-bit codegen.");
7436 HandleCustomVecRegLoc();
7437 HandleCustomVecRegLoc();
7447 MemOpChains.push_back(
7455 "Unexpected register handling for calling convention.");
7461 "Custom register handling only expected for VarArg.");
7469 RegsToPass.push_back(std::make_pair(VA.
getLocReg(), ArgAsInt));
7470 else if (
Arg.getValueType().getFixedSizeInBits() <
7473 RegsToPass.push_back(std::make_pair(
7479 "Unexpected custom register for argument!");
7483 RegsToPass.push_back(std::make_pair(
7493 RegsToPass.push_back(std::make_pair(
7500 if (!MemOpChains.empty())
7505 if (CFlags.IsIndirect) {
7506 assert(!CFlags.IsTailCall &&
"Indirect tail-calls not supported.");
7510 const unsigned TOCSaveOffset =
7526 for (
auto Reg : RegsToPass) {
7531 const int SPDiff = 0;
7532 return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7533 Callee, SPDiff, NumBytes,
Ins, InVals, CB);
7543 return CCInfo.CheckReturn(
7558 CCInfo.AnalyzeReturn(Outs,
7567 for (
unsigned i = 0, RealResIdx = 0;
i != RVLocs.size(); ++
i, ++RealResIdx) {
7609 RetOps.push_back(
Flag);
7615 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(
SDValue Op,
7620 EVT IntVT =
Op.getValueType();
7624 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7626 SDValue Ops[2] = {Chain, FPSIdx};
7640 bool isPPC64 = Subtarget.
isPPC64();
7641 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7661 bool isPPC64 = Subtarget.
isPPC64();
7682 PPCTargetLowering::getFramePointerFrameIndex(
SelectionDAG & DAG)
const {
7684 bool isPPC64 = Subtarget.
isPPC64();
7718 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7719 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7730 bool isPPC64 = Subtarget.
isPPC64();
7742 Op.getOperand(0),
Op.getOperand(1));
7749 Op.getOperand(0),
Op.getOperand(1));
7753 if (
Op.getValueType().isVector())
7754 return LowerVectorLoad(
Op, DAG);
7757 "Custom lowering only for i1 loads");
7778 if (
Op.getOperand(1).getValueType().isVector())
7779 return LowerVectorStore(
Op, DAG);
7782 "Custom lowering only for i1 stores");
7802 "Custom lowering only for i1 results");
7830 EVT TrgVT =
Op.getValueType();
7842 if (SrcSize > 256 ||
7854 if (SrcSize == 256) {
7865 Op1 = SrcSize == 128 ? N1 :
widenVec(DAG, N1,
DL);
7873 for (
unsigned i = 0;
i < TrgNumElts; ++
i)
7874 ShuffV.push_back(
i * SizeMult);
7876 for (
unsigned i = 1;
i <= TrgNumElts; ++
i)
7877 ShuffV.push_back(
i * SizeMult - 1);
7880 for (
unsigned i = TrgNumElts;
i < WideNumElts; ++
i)
7882 ShuffV.push_back(WideNumElts + 1);
7893 EVT ResVT =
Op.getValueType();
7894 EVT CmpVT =
Op.getOperand(0).getValueType();
7896 SDValue TV =
Op.getOperand(2), FV =
Op.getOperand(3);
8047 bool IsStrict =
Op->isStrictFPOpcode();
8056 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
8058 assert(Src.getValueType().isFloatingPoint());
8059 if (Src.getValueType() ==
MVT::f32) {
8064 Chain = Src.getValue(1);
8070 switch (
Op.getSimpleValueType().SimpleTy) {
8078 "i64 FP_TO_UINT is supported only with FPCVT");
8084 {Chain, Src}, Flags);
8091 void PPCTargetLowering::LowerFP_TO_INTForReuse(
SDValue Op, ReuseLoadInfo &RLI,
8093 const SDLoc &dl)
const {
8097 bool IsStrict =
Op->isStrictFPOpcode();
8101 (IsSigned || Subtarget.
hasFPCVT());
8103 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8115 SDValue Ops[] = { Chain, Tmp, FIPtr };
8119 Chain = DAG.
getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8123 if (
Op.getValueType() ==
MVT::i32 && !i32Stack) {
8140 const SDLoc &dl)
const {
8143 if (
Op->isStrictFPOpcode())
8150 const SDLoc &dl)
const {
8151 bool IsStrict =
Op->isStrictFPOpcode();
8154 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
8155 EVT SrcVT = Src.getValueType();
8156 EVT DstVT =
Op.getValueType();
8183 {Op.getOperand(0), Lo, Hi}, Flags);
8186 {Res.getValue(1), Res}, Flags);
8192 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8216 {Chain, Src, FltOfs}, Flags);
8220 {Chain, Val}, Flags);
8223 dl, DstVT, Sel, DAG.
getConstant(0, dl, DstVT), SignMask);
8242 return LowerFP_TO_INTDirectMove(
Op, DAG, dl);
8245 LowerFP_TO_INTForReuse(
Op, RLI, DAG, dl);
8247 return DAG.
getLoad(
Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8248 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8259 bool PPCTargetLowering::canReuseLoadAddress(
SDValue Op,
EVT MemVT,
8264 if (
Op->isStrictFPOpcode())
8273 Op.getOperand(0).getValueType())) {
8275 LowerFP_TO_INTForReuse(
Op, RLI, DAG, dl);
8280 if (!
LD ||
LD->getExtensionType() != ET ||
LD->isVolatile() ||
8281 LD->isNonTemporal())
8283 if (
LD->getMemoryVT() != MemVT)
8293 RLI.Ptr =
LD->getBasePtr();
8294 if (
LD->isIndexed() && !
LD->getOffset().isUndef()) {
8296 "Non-pre-inc AM on PPC?");
8301 RLI.Chain =
LD->getChain();
8302 RLI.MPI =
LD->getPointerInfo();
8303 RLI.IsDereferenceable =
LD->isDereferenceable();
8304 RLI.IsInvariant =
LD->isInvariant();
8305 RLI.Alignment =
LD->getAlign();
8306 RLI.AAInfo =
LD->getAAInfo();
8307 RLI.Ranges =
LD->getRanges();
8309 RLI.ResChain =
SDValue(
LD,
LD->isIndexed() ? 2 : 1);
8317 void PPCTargetLowering::spliceIntoChain(
SDValue ResChain,
8323 SDLoc dl(NewResChain);
8328 "A new TF really is required here");
8337 bool PPCTargetLowering::directMoveIsProfitable(
const SDValue &
Op)
const {
8338 SDNode *Origin =
Op.getOperand(0).getNode();
8353 if (UI.getUse().get().getResNo() != 0)
8383 if (
Op->isStrictFPOpcode()) {
8385 Chain =
Op.getOperand(0);
8389 return DAG.
getNode(ConvOpc, dl, ConvTy, Src);
8397 const SDLoc &dl)
const {
8400 "Invalid floating point type as target of conversion");
8402 "Int to FP conversions with direct moves require FPCVT");
8403 SDValue Src =
Op.getOperand(
Op->isStrictFPOpcode() ? 1 : 0);
8404 bool WordInt = Src.getSimpleValueType().SimpleTy ==
MVT::i32;
8426 for (
unsigned i = 1;
i < NumConcat; ++
i)
8433 const SDLoc &dl)
const {
8434 bool IsStrict =
Op->isStrictFPOpcode();
8435 unsigned Opc =
Op.getOpcode();
8436 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
8439 "Unexpected conversion type");
8441 "Supports conversions to v2f64/v4f32 only.");
8456 for (
unsigned i = 0;
i < WideNumElts; ++
i)
8457 ShuffV.push_back(
i + WideNumElts);
8459 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8460 int SaveElts = FourEltRes ? 4 : 2;
8462 for (
int i = 0;
i < SaveElts;
i++)
8463 ShuffV[
i * Stride] =
i;
8465 for (
int i = 1;
i <= SaveElts;
i++)
8466 ShuffV[
i * Stride - 1] =
i - 1;
8474 Arrange = DAG.
getBitcast(IntermediateVT, Arrange);
8475 EVT ExtVT = Src.getValueType();
8487 {Op.getOperand(0), Extend}, Flags);
8489 return DAG.
getNode(Opc, dl,
Op.getValueType(), Extend);
8497 bool IsStrict =
Op->isStrictFPOpcode();
8498 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
8505 EVT InVT = Src.getValueType();
8506 EVT OutVT =
Op.getValueType();
8509 return LowerINT_TO_FPVector(
Op, DAG, dl);
8519 if (Src.getValueType() ==
MVT::i1) {
8533 return LowerINT_TO_FPDirectMove(
Op, DAG, dl);
8536 "UINT_TO_FP is supported only with FPCVT");
8538 if (Src.getValueType() ==
MVT::i64) {
8592 if (canReuseLoadAddress(SINT,
MVT::i64, RLI, DAG)) {
8594 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8595 spliceIntoChain(RLI.ResChain,
Bits.getValue(1), DAG);
8600 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8601 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8605 spliceIntoChain(RLI.ResChain,
Bits.getValue(1), DAG);
8610 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8611 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8615 spliceIntoChain(RLI.ResChain,
Bits.getValue(1), DAG);
8633 "Expected an i32 store");
8639 RLI.Alignment =
Align(4);
8643 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8644 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8649 Chain =
Bits.getValue(1);
8655 Chain =
FP.getValue(1);
8661 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8670 "Unhandled INT_TO_FP type in custom expander!");
8683 if (!(ReusingLoad = canReuseLoadAddress(Src,
MVT::i32, RLI, DAG))) {
8693 "Expected an i32 store");
8699 RLI.Alignment =
Align(4);
8704 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8705 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8711 spliceIntoChain(RLI.ResChain, Ld.
getValue(1), DAG);
8714 "i32->FP without LFIWAX supported only on PPC64");
8723 Chain, dl, Ext64, FIdx,
8737 Chain =
FP.getValue(1);
8742 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8773 EVT VT =
Op.getValueType();
8779 Chain =
MFFS.getValue(1);
8793 "Stack slot adjustment is valid only on big endian subtargets!");
8823 EVT VT =
Op.getValueType();
8827 VT ==
Op.getOperand(1).getValueType() &&
8847 SDValue OutOps[] = { OutLo, OutHi };
8852 EVT VT =
Op.getValueType();
8856 VT ==
Op.getOperand(1).getValueType() &&
8876 SDValue OutOps[] = { OutLo, OutHi };
8882 EVT VT =
Op.getValueType();
8885 VT ==
Op.getOperand(1).getValueType() &&
8905 SDValue OutOps[] = { OutLo, OutHi };
8912 EVT VT =
Op.getValueType();
8919 EVT AmtVT =
Z.getValueType();
8942 static const MVT VTys[] = {
8949 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
8954 EVT CanonicalVT = VTys[SplatSize-1];
8998 for (
unsigned i = 0;
i != 16; ++
i)
9026 bool IsSplat =
true;
9027 bool IsLoad =
false;
9054 return !(IsSplat && IsLoad);
9092 APFloat APFloatToConvert = ArgAPFloat;
9093 bool LosesInfo =
true;
9098 ArgAPFloat = APFloatToConvert;
9120 APFloat APFloatToConvert = ArgAPFloat;
9121 bool LosesInfo =
true;
9125 return (!LosesInfo && !APFloatToConvert.
isDenormal());
9130 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(
Op.getOperand(0));
9134 EVT Ty =
Op->getValueType(0);
9173 assert(BVN &&
"Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9176 APInt APSplatBits, APSplatUndef;
9177 unsigned SplatBitSize;
9179 bool BVNIsConstantSplat =
9187 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9226 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9233 const SDValue *InputLoad = &
Op.getOperand(0);
9238 unsigned MemorySize =
LD->getMemoryVT().getScalarSizeInBits();
9239 unsigned ElementSize =
9242 assert(((ElementSize == 2 * MemorySize)
9246 "Unmatched element size and opcode!\n");
9251 unsigned NumUsesOfInputLD = 128 / ElementSize;
9253 if (BVInOp.isUndef())
9268 if (NumUsesOfInputLD == 1 &&
9280 Subtarget.
isISA3_1() && ElementSize <= 16)
9283 assert(NumUsesOfInputLD > 0 &&
"No uses of input LD of a build_vector?");
9293 LD->getMemoryVT(),
LD->getMemOperand());
9314 unsigned SplatSize = SplatBitSize / 8;
9319 if (SplatBits == 0) {
9335 Op.getValueType(), DAG, dl);
9347 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9349 if (SextVal >= -16 && SextVal <= 15)
9362 if (SextVal >= -32 && SextVal <= 31) {
9371 if (VT ==
Op.getValueType())
9380 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9394 static const signed char SplatCsts[] = {
9395 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9396 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9402 int i = SplatCsts[idx];
9406 unsigned TypeShiftAmt =
i & (SplatBitSize-1);
9409 if (SextVal == (
int)((unsigned)
i << TypeShiftAmt)) {
9411 static const unsigned IIDs[] = {
9412 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9413 Intrinsic::ppc_altivec_vslw
9420 if (SextVal == (
int)((
unsigned)
i >> TypeShiftAmt)) {
9422 static const unsigned IIDs[] = {
9423 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9424 Intrinsic::ppc_altivec_vsrw
9431 if (SextVal == (
int)(((
unsigned)
i << TypeShiftAmt) |
9432 ((
unsigned)
i >> (SplatBitSize-TypeShiftAmt)))) {
9434 static const unsigned IIDs[] = {
9435 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9436 Intrinsic::ppc_altivec_vrlw
9443 if (SextVal == (
int)(((
unsigned)
i << 8) | (
i < 0 ? 0xFF : 0))) {
9449 if (SextVal == (
int)(((
unsigned)
i << 16) | (
i < 0 ? 0xFFFF : 0))) {
9455 if (SextVal == (
int)(((
unsigned)
i << 24) | (
i < 0 ? 0xFFFFFF : 0))) {
9470 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9471 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9472 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9488 if (LHSID == (1*9+2)*9+3)
return LHS;
9489 assert(LHSID == ((4*9+5)*9+6)*9+7 &&
"Illegal OP_COPY!");
9501 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9502 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9503 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9504 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9507 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9508 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9509 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9510 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9513 for (
unsigned i = 0;
i != 16; ++
i)
9514 ShufIdxs[
i] = (
i&3)+0;
9517 for (
unsigned i = 0;
i != 16; ++
i)
9518 ShufIdxs[
i] = (
i&3)+4;
9521 for (
unsigned i = 0;
i != 16; ++
i)
9522 ShufIdxs[
i] = (
i&3)+8;
9525 for (
unsigned i = 0;
i != 16; ++
i)
9526 ShufIdxs[
i] = (
i&3)+12;
9547 const unsigned BytesInVector = 16;
9552 unsigned ShiftElts = 0, InsertAtByte = 0;
9556 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9557 0, 15, 14, 13, 12, 11, 10, 9};
9558 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9559 1, 2, 3, 4, 5, 6, 7, 8};
9562 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9574 bool FoundCandidate =
false;
9578 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9581 for (
unsigned i = 0;
i < BytesInVector; ++
i) {
9582 unsigned CurrentElement =
Mask[
i];
9585 if (
V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9588 bool OtherElementsInOrder =
true;
9591 for (
unsigned j = 0;
j < BytesInVector; ++
j) {
9598 (!
V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9599 if (
Mask[
j] != OriginalOrder[
j] + MaskOffset) {
9600 OtherElementsInOrder =
false;
9607 if (OtherElementsInOrder) {
9614 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9615 : BigEndianShifts[CurrentElement & 0xF];
9616 Swap = CurrentElement < BytesInVector;
9618 InsertAtByte = IsLE ? BytesInVector - (
i + 1) :
i;
9619 FoundCandidate =
true;
9624 if (!FoundCandidate)
9648 const unsigned NumHalfWords = 8;
9649 const unsigned BytesInVector = NumHalfWords * 2;
9658 unsigned ShiftElts = 0, InsertAtByte = 0;
9662 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9663 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9666 uint32_t OriginalOrderLow = 0x1234567;
9667 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9670 for (
unsigned i = 0;
i < NumHalfWords; ++
i) {
9671 unsigned MaskShift = (NumHalfWords - 1 -
i) * 4;
9688 bool FoundCandidate =
false;
9691 for (
unsigned i = 0;
i < NumHalfWords; ++
i) {
9692 unsigned MaskShift = (NumHalfWords - 1 -
i) * 4;
9694 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9702 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9703 TargetOrder = OriginalOrderLow;
9707 if (MaskOneElt == VINSERTHSrcElem &&
9708 (
Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9709 InsertAtByte = IsLE ? BytesInVector - (
i + 1) * 2 :
i * 2;
9710 FoundCandidate =
true;
9716 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9718 if ((
Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9720 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9721 : BigEndianShifts[MaskOneElt & 0x7];
9722 InsertAtByte = IsLE ? BytesInVector - (
i + 1) * 2 :
i * 2;
9723 Swap = MaskOneElt < NumHalfWords;
9724 FoundCandidate =
true;
9730 if (!FoundCandidate)
9765 auto ShuffleMask = SVN->
getMask();
9777 ShuffleMask = cast<ShuffleVectorSDNode>(
VecShuffle)->getMask();
9786 APInt APSplatValue, APSplatUndef;
9787 unsigned SplatBitSize;
9803 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9804 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9805 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9807 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9808 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9809 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9817 for (; SplatBitSize < 32; SplatBitSize <<= 1)
9818 SplatVal |= (SplatVal << SplatBitSize);
9833 "Only set v1i128 as custom, other type shouldn't reach here!");
9838 if (SHLAmt % 8 == 0) {
9839 std::array<int, 16>
Mask;
9840 std::iota(
Mask.begin(),
Mask.end(), 0);
9870 if (
SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9871 if (!isa<ShuffleVectorSDNode>(NewShuffle))
9874 SVOp = cast<ShuffleVectorSDNode>(
Op);
9875 V1 =
Op.getOperand(0);
9876 V2 =
Op.getOperand(1);
9878 EVT VT =
Op.getValueType();
9881 unsigned ShiftElts, InsertAtByte;
9887 bool IsPermutedLoad =
false;
9889 if (InputLoad && Subtarget.
hasVSX() &&
V2.isUndef() &&
9899 if (IsPermutedLoad) {
9900 assert((isLittleEndian || IsFourByte) &&
9901 "Unexpected size for permuted load on big endian target");
9902 SplatIdx += IsFourByte ? 2 : 1;
9903 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9904 "Splat of a value outside of the loaded memory");
9909 if ((IsFourByte && Subtarget.
hasP9Vector()) || !IsFourByte) {
9912 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9914 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9918 if (
LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
9934 Ops,
LD->getMemoryVT(),
LD->getMemOperand());
9967 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9968 return SplatInsertNode;
9973 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9976 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9980 if (Subtarget.
hasVSX() &&
9993 if (Subtarget.
hasVSX() &&
10026 if (Subtarget.
hasVSX()) {
10047 if (
V2.isUndef()) {
10071 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10092 unsigned PFIndexes[4];
10093 bool isFourElementShuffle =
true;
10094 for (
unsigned i = 0;
i != 4 && isFourElementShuffle;
10096 unsigned EltNo = 8;
10097 for (
unsigned j = 0;
j != 4; ++
j) {
10098 if (PermMask[
i * 4 +
j] < 0)
10101 unsigned ByteSource = PermMask[
i * 4 +
j];
10102 if ((ByteSource & 3) !=
j) {
10103 isFourElementShuffle =
false;
10108 EltNo = ByteSource / 4;
10109 }
else if (EltNo != ByteSource / 4) {
10110 isFourElementShuffle =
false;
10114 PFIndexes[
i] = EltNo;
10122 if (isFourElementShuffle) {
10124 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10125 PFIndexes[2] * 9 + PFIndexes[3];
10128 unsigned Cost = (PFEntry >> 30);
10148 if (
V2.isUndef())
V2 = V1;
10162 unsigned SrcElt = PermMask[
i] < 0 ? 0 : PermMask[
i];
10164 for (
unsigned j = 0;
j != BytesPerElement; ++
j)
10165 if (isLittleEndian)
10166 ResultMask.push_back(DAG.
getConstant(31 - (SrcElt*BytesPerElement +
j),
10169 ResultMask.push_back(DAG.
getConstant(SrcElt*BytesPerElement +
j, dl,
10173 ShufflesHandledWithVPERM++;
10175 LLVM_DEBUG(
dbgs() <<
"Emitting a VPERM for the following shuffle:\n");
10177 LLVM_DEBUG(
dbgs() <<
"With the following permute control vector:\n");
10180 if (isLittleEndian)
10182 V2, V1, VPermMask);
10185 V1,
V2, VPermMask);
10193 unsigned IntrinsicID =
10194 cast<ConstantSDNode>(Intrin.
getOperand(0))->getZExtValue();
10197 switch (IntrinsicID) {
10201 case Intrinsic::ppc_altivec_vcmpbfp_p:
10205 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10209 case Intrinsic::ppc_altivec_vcmpequb_p:
10213 case Intrinsic::ppc_altivec_vcmpequh_p:
10217 case Intrinsic::ppc_altivec_vcmpequw_p:
10221 case Intrinsic::ppc_altivec_vcmpequd_p:
10228 case Intrinsic::ppc_altivec_vcmpneb_p:
10229 case Intrinsic::ppc_altivec_vcmpneh_p:
10230 case Intrinsic::ppc_altivec_vcmpnew_p:
10231 case Intrinsic::ppc_altivec_vcmpnezb_p:
10232 case Intrinsic::ppc_altivec_vcmpnezh_p:
10233 case Intrinsic::ppc_altivec_vcmpnezw_p:
10235 switch (IntrinsicID) {
10238 case Intrinsic::ppc_altivec_vcmpneb_p:
10241 case Intrinsic::ppc_altivec_vcmpneh_p:
10244 case Intrinsic::ppc_altivec_vcmpnew_p:
10247 case Intrinsic::ppc_altivec_vcmpnezb_p:
10250 case Intrinsic::ppc_altivec_vcmpnezh_p:
10253 case Intrinsic::ppc_altivec_vcmpnezw_p:
10261 case Intrinsic::ppc_altivec_vcmpgefp_p:
10265 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10269 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10273 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10277 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10281 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10288 case Intrinsic::ppc_altivec_vcmpgtub_p:
10292 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10296 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10300 case Intrinsic::ppc_altivec_vcmpgtud_p:
10308 case Intrinsic::ppc_altivec_vcmpequq:
10309 case Intrinsic::ppc_altivec_vcmpgtsq:
10310 case Intrinsic::ppc_altivec_vcmpgtuq:
10313 switch (IntrinsicID) {
10316 case Intrinsic::ppc_altivec_vcmpequq:
10319 case Intrinsic::ppc_altivec_vcmpgtsq:
10322 case Intrinsic::ppc_altivec_vcmpgtuq:
10329 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10330 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10331 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10332 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10333 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10334 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10335 if (Subtarget.
hasVSX()) {
10336 switch (IntrinsicID) {
10337 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10340 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10343 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10346 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10349 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10352 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10362 case Intrinsic::ppc_altivec_vcmpbfp:
10365 case Intrinsic::ppc_altivec_vcmpeqfp:
10368 case Intrinsic::ppc_altivec_vcmpequb:
10371 case Intrinsic::ppc_altivec_vcmpequh:
10374 case Intrinsic::ppc_altivec_vcmpequw:
10377 case Intrinsic::ppc_altivec_vcmpequd:
10383 case Intrinsic::ppc_altivec_vcmpneb:
10384 case Intrinsic::ppc_altivec_vcmpneh:
10385 case Intrinsic::ppc_altivec_vcmpnew:
10386 case Intrinsic::ppc_altivec_vcmpnezb:
10387 case Intrinsic::ppc_altivec_vcmpnezh:
10388 case Intrinsic::ppc_altivec_vcmpnezw:
10390 switch (IntrinsicID) {
10393 case Intrinsic::ppc_altivec_vcmpneb:
10396 case Intrinsic::ppc_altivec_vcmpneh:
10399 case Intrinsic::ppc_altivec_vcmpnew:
10402 case Intrinsic::ppc_altivec_vcmpnezb:
10405 case Intrinsic::ppc_altivec_vcmpnezh:
10408 case Intrinsic::ppc_altivec_vcmpnezw:
10415 case Intrinsic::ppc_altivec_vcmpgefp:
10418 case Intrinsic::ppc_altivec_vcmpgtfp:
10421 case Intrinsic::ppc_altivec_vcmpgtsb:
10424 case Intrinsic::ppc_altivec_vcmpgtsh:
10427 case Intrinsic::ppc_altivec_vcmpgtsw:
10430 case Intrinsic::ppc_altivec_vcmpgtsd:
10436 case Intrinsic::ppc_altivec_vcmpgtub:
10439 case Intrinsic::ppc_altivec_vcmpgtuh:
10442 case Intrinsic::ppc_altivec_vcmpgtuw:
10445 case Intrinsic::ppc_altivec_vcmpgtud:
10451 case Intrinsic::ppc_altivec_vcmpequq_p:
10452 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10453 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10456 switch (IntrinsicID) {
10459 case Intrinsic::ppc_altivec_vcmpequq_p:
10462 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10465 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10479 unsigned IntrinsicID =
10480 cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
10484 switch (IntrinsicID) {
10485 case Intrinsic::thread_pointer:
10491 case Intrinsic::ppc_mma_disassemble_acc:
10492 case Intrinsic::ppc_vsx_disassemble_pair: {
10495 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10500 for (
int VecNo = 0; VecNo < NumVecs; VecNo++) {
10506 RetOps.push_back(Extract);
10511 case Intrinsic::ppc_unpack_longdouble: {
10512 auto *Idx = dyn_cast<ConstantSDNode>(
Op.getOperand(2));
10513 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
10514 "Argument of long double unpack must be 0 or 1!");
10517 Idx->getValueType(0)));
10520 case Intrinsic::ppc_compare_exp_lt:
10521 case Intrinsic::ppc_compare_exp_gt:
10522 case Intrinsic::ppc_compare_exp_eq:
10523 case Intrinsic::ppc_compare_exp_uo: {
10525 switch (IntrinsicID) {
10526 case Intrinsic::ppc_compare_exp_lt:
10529 case Intrinsic::ppc_compare_exp_gt:
10532 case Intrinsic::ppc_compare_exp_eq:
10535 case Intrinsic::ppc_compare_exp_uo:
10542 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
10543 Op.getOperand(1), Op.getOperand(2)),
10545 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10546 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
10549 case Intrinsic::ppc_test_data_class_d:
10550 case Intrinsic::ppc_test_data_class_f: {
10551 unsigned CmprOpc = PPC::XSTSTDCDP;
10552 if (IntrinsicID == Intrinsic::ppc_test_data_class_f)
10553 CmprOpc = PPC::XSTSTDCSP;
10557 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
10560 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10561 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
10564 case Intrinsic::ppc_fnmsub: {
10565 EVT VT =
Op.getOperand(1).getValueType();
10572 Op.getOperand(2),
Op.getOperand(3));
10574 case Intrinsic::ppc_convert_f128_to_ppcf128:
10575 case Intrinsic::ppc_convert_ppcf128_to_f128: {
10576 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
10577 ? RTLIB::CONVERT_PPCF128_F128
10578 : RTLIB::CONVERT_F128_PPCF128;
10579 MakeLibCallOptions CallOptions;
10580 std::pair<SDValue, SDValue>
Result =
10581 makeLibCall(DAG, LC,
Op.getValueType(),
Op.getOperand(1), CallOptions,
10585 case Intrinsic::ppc_maxfe:
10586 case Intrinsic::ppc_maxfl:
10587 case Intrinsic::ppc_maxfs:
10588 case Intrinsic::ppc_minfe:
10589 case Intrinsic::ppc_minfl:
10590 case Intrinsic::ppc_minfs: {
10591 EVT VT =
Op.getValueType();
10594 [VT](
const SDUse &
Use) { return Use.getValueType() == VT; }) &&
10595 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
10598 if (IntrinsicID == Intrinsic::ppc_minfe ||
10599 IntrinsicID == Intrinsic::ppc_minfl ||
10600 IntrinsicID == Intrinsic::ppc_minfs)
10602 unsigned I =
Op.getNumOperands() - 2, Cnt =
I;
10604 for (--
I; Cnt != 0; --Cnt,
I = (--
I == 0 ? (
Op.getNumOperands() - 1) :
I)) {
10622 Op.getOperand(1),
Op.getOperand(2),
10645 switch (cast<ConstantSDNode>(
Op.getOperand(1))->getZExtValue()) {
10648 BitNo = 0; InvertBit =
false;
10651 BitNo = 0; InvertBit =
true;
10654 BitNo = 2; InvertBit =
false;
10657 BitNo = 2; InvertBit =
true;
10679 int ArgStart = isa<ConstantSDNode>(
Op.getOperand(0)) ? 0 : 1;
10681 switch (cast<ConstantSDNode>(
Op.getOperand(ArgStart))->getZExtValue()) {
10682 case Intrinsic::ppc_cfence: {
10683 assert(ArgStart == 1 &&
"llvm.ppc.cfence must carry a chain argument.");
10684 assert(Subtarget.
isPPC64() &&
"Only 64-bit is supported for now.");
10685 SDValue Val =
Op.getOperand(ArgStart + 1);
10715 int VectorIndex = 0;
10728 "Expecting an atomic compare-and-swap here.");
10730 auto *AtomicNode = cast<AtomicSDNode>(
Op.getNode());
10731 EVT MemVT = AtomicNode->getMemoryVT();
10749 for (
int i = 0,
e = AtomicNode->getNumOperands();
i <
e;
i++)
10750 Ops.push_back(AtomicNode->getOperand(
i));
10762 EVT MemVT =
N->getMemoryVT();
10764 "Expect quadword atomic operations");
10766 unsigned Opc =
N->getOpcode();
10775 for (
int I = 1,
E =
N->getNumOperands();
I <
E; ++
I)
10776 Ops.push_back(
N->getOperand(
I));
10778 Ops, MemVT,
N->getMemOperand());
10801 Ops.push_back(ValLo);
10802 Ops.push_back(ValHi);
10803 Ops.push_back(
N->getOperand(1));
10805 N->getMemOperand());
10831 "Should only be called for ISD::INSERT_VECTOR_ELT");
10835 EVT VT =
Op.getValueType();
10853 (isa<LoadSDNode>(
V2))) {
10858 BitcastLoad,
Op.getOperand(2));
10882 unsigned InsertAtElement =
C->getZExtValue();
10883 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10885 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10899 EVT VT =
Op.getValueType();
10908 "Type unsupported without MMA");
10910 "Type unsupported without paired vector support");
10915 for (
unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10923 Loads.push_back(
Load);
10924 LoadChains.push_back(
Load.getValue(1));
10945 EVT StoreVT =
Value.getValueType();
10954 "Type unsupported without MMA");
10956 "Type unsupported without paired vector support");
10959 unsigned NumVecs = 2;
10964 for (
unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10965 unsigned VecNum = Subtarget.
isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10969 DAG.
getStore(StoreChain, dl, Elt, BasePtr,
10975 Stores.push_back(
Store);
11027 for (
unsigned i = 0;
i != 8; ++
i) {
11028 if (isLittleEndian) {
11030 Ops[
i*2+1] = 2*
i+16;
11033 Ops[
i*2+1] = 2*
i+1+16;
11036 if (isLittleEndian)
11046 bool IsStrict =
Op->isStrictFPOpcode();
11047 if (
Op.getOperand(IsStrict ? 1 : 0).getValueType() ==
MVT::f128 &&
11058 "Should only be called for ISD::FP_EXTEND");
11075 "Node should have 2 operands with second one being a constant!");
11081 int Idx = cast<ConstantSDNode>(Op0.
getOperand(1))->getZExtValue();
11087 int DWord = Idx >> 1;
11107 SDValue LoadOps[] = {
LD->getChain(),
LD->getBasePtr()};
11110 LD->getMemoryVT(),
LD->getMemOperand());
11120 SDValue LoadOps[] = {
LD->getChain(),
LD->getBasePtr()};
11123 LD->getMemoryVT(),
LD->getMemOperand());
11134 switch (
Op.getOpcode()) {
11163 return LowerGET_DYNAMIC_AREA_OFFSET(
Op, DAG);
11189 case ISD::FSHL:
return LowerFunnelShift(
Op, DAG);
11190 case ISD::FSHR:
return LowerFunnelShift(
Op, DAG);
11202 return LowerFP_ROUND(
Op, DAG);
11215 return LowerINTRINSIC_VOID(
Op, DAG);
11217 return LowerBSWAP(
Op, DAG);
11219 return LowerATOMIC_CMP_SWAP(
Op, DAG);
11221 return LowerATOMIC_LOAD_STORE(
Op, DAG);
11229 switch (
N->getOpcode()) {
11231 llvm_unreachable(
"Do not know how to custom type legalize this operation!");
11248 if (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue() !=
11249 Intrinsic::loop_decrement)
11253 "Unexpected result type for CTR decrement intrinsic");
11255 N->getValueType(0));
11265 switch (cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue()) {
11266 case Intrinsic::ppc_pack_longdouble:
11268 N->getOperand(2),
N->getOperand(1)));
11270 case Intrinsic::ppc_maxfe:
11271 case Intrinsic::ppc_minfe:
11272 case Intrinsic::ppc_fnmsub:
11273 case Intrinsic::ppc_convert_f128_to_ppcf128:
11283 EVT VT =
N->getValueType(0);
11298 if (
N->getOperand(
N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11302 Results.push_back(LoweredValue);
11303 if (
N->isStrictFPOpcode())
11308 if (!
N->getValueType(0).isVector())
11337 return Builder.CreateCall(Func, {});
11359 if (isa<LoadInst>(Inst) && Subtarget.
isPPC64())
11362 Builder.GetInsertBlock()->getParent()->getParent(),
11363 Intrinsic::ppc_cfence, {Inst->getType()}),
11373 unsigned AtomicSize,
11374 unsigned BinOpcode,
11375 unsigned CmpOpcode,
11376 unsigned CmpPred)
const {
11380 auto LoadMnemonic = PPC::LDARX;
11381 auto StoreMnemonic = PPC::STDCX;
11382 switch (AtomicSize) {
11386 LoadMnemonic = PPC::LBARX;
11387 StoreMnemonic = PPC::STBCX;
11391 LoadMnemonic = PPC::LHARX;
11392 StoreMnemonic = PPC::STHCX;
11396 LoadMnemonic = PPC::LWARX;
11397 StoreMnemonic = PPC::STWCX;
11400 LoadMnemonic = PPC::LDARX;
11401 StoreMnemonic = PPC::STDCX;
11417 CmpOpcode ?
F->CreateMachineBasicBlock(LLVM_BB) :
nullptr;
11419 F->insert(It, loopMBB);
11421 F->insert(It, loop2MBB);
11422 F->insert(It, exitMBB);
11428 Register TmpReg = (!BinOpcode) ? incr :
11430 : &PPC::GPRCRegClass);
11435 BB->addSuccessor(loopMBB);
11461 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11463 BuildMI(
BB, dl,
TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11473 BB->addSuccessor(loop2MBB);
11474 BB->addSuccessor(exitMBB);
11481 BB->addSuccessor(loopMBB);
11482 BB->addSuccessor(exitMBB);
11491 switch(
MI.getOpcode()) {
11495 return TII->isSignExtended(
MI);
11519 case PPC::EXTSB8_32_64:
11520 case PPC::EXTSB8_rec:
11521 case PPC::EXTSB_rec:
11524 case PPC::EXTSH8_32_64:
11525 case PPC::EXTSH8_rec:
11526 case PPC::EXTSH_rec:
11529 case PPC::EXTSWSLI_32_64:
11530 case PPC::EXTSWSLI_32_64_rec:
11531 case PPC::EXTSWSLI_rec:
11532 case PPC::EXTSW_32:
11533 case PPC::EXTSW_32_64:
11534 case PPC::EXTSW_32_64_rec:
11535 case PPC::EXTSW_rec:
11538 case PPC::SRAWI_rec:
11539 case PPC::SRAW_rec:
11548 unsigned BinOpcode,
unsigned CmpOpcode,
unsigned CmpPred)
const {
11561 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
11563 BuildMI(*
BB,
MI, dl,
TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
11564 .
addReg(
MI.getOperand(3).getReg());
11565 MI.getOperand(3).setReg(ValueReg);
11576 bool is64bit = Subtarget.
isPPC64();
11578 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11589 CmpOpcode ?
F->CreateMachineBasicBlock(LLVM_BB) :
nullptr;
11591 F->insert(It, loopMBB);
11593 F->insert(It, loop2MBB);
11594 F->insert(It, exitMBB);
11600 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11623 BB->addSuccessor(loopMBB);
11645 if (ptrA != ZeroReg) {
11647 BuildMI(
BB, dl,
TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11656 .
addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11659 .
addImm(is8bit ? 28 : 27);
11660 if (!isLittleEndian)
11663 .
addImm(is8bit ? 24 : 16);
11707 unsigned ValueReg = SReg;
11708 unsigned CmpReg = Incr2Reg;
11709 if (CmpOpcode == PPC::CMPW) {
11715 BuildMI(
BB, dl,
TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11717 ValueReg = ValueSReg;
11727 BB->addSuccessor(loop2MBB);
11728 BB->addSuccessor(exitMBB);
11740 BB->addSuccessor(loopMBB);
11741 BB->addSuccessor(exitMBB);
11751 .
addImm(is8bit ? 24 : 16)
11772 Register DstReg =
MI.getOperand(0).getReg();
11780 "Invalid Pointer Size!");
11829 Register BufReg =
MI.getOperand(1).getReg();
11844 BaseReg = Subtarget.
isPPC64() ? PPC::X1 : PPC::R1;
11846 BaseReg = Subtarget.
isPPC64() ? PPC::BP8 : PPC::BP;
11849 TII->get(Subtarget.
isPPC64() ? PPC::STD : PPC::STW))
11872 TII->get(Subtarget.
isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11893 TII->get(PPC::PHI), DstReg)
11897 MI.eraseFromParent();
11912 "Invalid Pointer Size!");
11915 (PVT ==
MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11918 unsigned FP = (PVT ==
MVT::i64) ? PPC::X31 : PPC::R31;
11919 unsigned SP = (PVT ==
MVT::i64) ? PPC::X1 : PPC::R1;
11933 Register BufReg =
MI.getOperand(0).getReg();
11999 MI.eraseFromParent();
12015 "Unexpected stack alignment");
12018 unsigned StackProbeSize = 4096;
12026 return StackProbeSize ? StackProbeSize :
StackAlign;
12038 const bool isPPC64 = Subtarget.
isPPC64();
12070 MF->
insert(MBBIter, TestMBB);
12071 MF->
insert(MBBIter, BlockMBB);
12072 MF->
insert(MBBIter, TailMBB);
12077 Register DstReg =
MI.getOperand(0).getReg();
12078 Register NegSizeReg =
MI.getOperand(1).getReg();
12079 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12090 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12096 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12097 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12099 .
addDef(ActualNegSizeReg)
12101 .
add(
MI.getOperand(2))
12102 .
add(
MI.getOperand(3));
12108 .
addReg(ActualNegSizeReg);
12111 int64_t NegProbeSize = -(int64_t)ProbeSize;
12117 .
addImm(NegProbeSize >> 16);
12121 .
addImm(NegProbeSize & 0xFFFF);
12130 .
addReg(ActualNegSizeReg)
12139 .
addReg(ActualNegSizeReg);
12149 BuildMI(TestMBB,
DL,
TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12163 BuildMI(BlockMBB,
DL,
TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12177 MaxCallFrameSizeReg)
12178 .
add(
MI.getOperand(2))
12179 .
add(
MI.getOperand(3));
12180 BuildMI(TailMBB,
DL,
TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12182 .
addReg(MaxCallFrameSizeReg);
12191 MI.eraseFromParent();
12193 ++NumDynamicAllocaProbed;
12216 if (
MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12217 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12219 }
else if (
MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12220 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12234 if (
MI.getOpcode() == PPC::SELECT_CC_I4 ||
12235 MI.getOpcode() == PPC::SELECT_CC_I8 ||
MI.getOpcode() == PPC::SELECT_I4 ||
12236 MI.getOpcode() == PPC::SELECT_I8) {
12238 if (
MI.getOpcode() == PPC::SELECT_CC_I4 ||
12239 MI.getOpcode() == PPC::SELECT_CC_I8)
12240 Cond.push_back(
MI.getOperand(4));
12243 Cond.push_back(
MI.getOperand(1));
12246 TII->insertSelect(*
BB,
MI, dl,
MI.getOperand(0).getReg(),
Cond,
12247 MI.getOperand(2).getReg(),
MI.getOperand(3).getReg());
12248 }
else if (
MI.getOpcode() == PPC::SELECT_CC_F4 ||
12249 MI.getOpcode() == PPC::SELECT_CC_F8 ||
12250 MI.getOpcode() == PPC::SELECT_CC_F16 ||
12251 MI.getOpcode() == PPC::SELECT_CC_VRRC ||
12252 MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
12253 MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
12254 MI.getOpcode() == PPC::SELECT_CC_VSRC ||
12255 MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
12256 MI.getOpcode() == PPC::SELECT_CC_SPE ||
12257 MI.getOpcode() == PPC::SELECT_F4 ||
12258 MI.getOpcode() == PPC::SELECT_F8 ||
12259 MI.getOpcode() == PPC::SELECT_F16 ||
12260 MI.getOpcode() == PPC::SELECT_SPE ||
12261 MI.getOpcode() == PPC::SELECT_SPE4 ||
12262 MI.getOpcode() == PPC::SELECT_VRRC ||
12263 MI.getOpcode() == PPC::SELECT_VSFRC ||
12264 MI.getOpcode() == PPC::SELECT_VSSRC ||
12265 MI.getOpcode() == PPC::SELECT_VSRC) {
12280 F->insert(It, copy0MBB);
12281 F->insert(It, sinkMBB);
12289 BB->addSuccessor(copy0MBB);
12290 BB->addSuccessor(sinkMBB);
12292 if (
MI.getOpcode() == PPC::SELECT_I4 ||
MI.getOpcode() == PPC::SELECT_I8 ||
12293 MI.getOpcode() == PPC::SELECT_F4 ||
MI.getOpcode() == PPC::SELECT_F8 ||
12294 MI.getOpcode() == PPC::SELECT_F16 ||
12295 MI.getOpcode() == PPC::SELECT_SPE4 ||
12296 MI.getOpcode() == PPC::SELECT_SPE ||
12297 MI.getOpcode() == PPC::SELECT_VRRC ||
12298 MI.getOpcode() == PPC::SELECT_VSFRC ||
12299 MI.getOpcode() == PPC::SELECT_VSSRC ||
12300 MI.getOpcode() == PPC::SELECT_VSRC) {
12302 .
addReg(
MI.getOperand(1).getReg())
12305 unsigned SelectPred =
MI.getOperand(4).getImm();
12308 .
addReg(
MI.getOperand(1).getReg())
12318 BB->addSuccessor(sinkMBB);
12324 BuildMI(*
BB,
BB->begin(), dl,
TII->get(PPC::PHI),
MI.getOperand(0).getReg())
12325 .
addReg(
MI.getOperand(3).getReg())
12327 .
addReg(
MI.getOperand(2).getReg())
12329 }
else if (
MI.getOpcode() == PPC::ReadTB) {
12345 F->insert(It, readMBB);
12346 F->insert(It, sinkMBB);
12353 BB->addSuccessor(readMBB);
12375 BB->addSuccessor(readMBB);
12376 BB->addSuccessor(sinkMBB);
12377 }
else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12379 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12381 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12383 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12386 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12388 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12390 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12392 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12395 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12397 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12399 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12401 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12404 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12406 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12408 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12410 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12413 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12415 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12417 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12419 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12422 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12424 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12426 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12428 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12431 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12433 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12435 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12437 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12440 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12442 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12444 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12446 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12449 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12451 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12453 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12455 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12458 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12460 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12462 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12464 else if (
MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12467 else if (
MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12469 else if (
MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12471 else if (
MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12473 else if (
MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12475 else if (
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12476 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12478 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12480 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12481 bool is64bit =
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12483 auto LoadMnemonic = PPC::LDARX;
12484 auto StoreMnemonic = PPC::STDCX;
12485 switch (
MI.getOpcode()) {
12488 case PPC::ATOMIC_CMP_SWAP_I8:
12489 LoadMnemonic = PPC::LBARX;
12490 StoreMnemonic = PPC::STBCX;
12493 case PPC::ATOMIC_CMP_SWAP_I16:
12494 LoadMnemonic = PPC::LHARX;
12495 StoreMnemonic = PPC::STHCX;
12498 case PPC::ATOMIC_CMP_SWAP_I32:
12499 LoadMnemonic = PPC::LWARX;
12500 StoreMnemonic = PPC::STWCX;
12502 case PPC::ATOMIC_CMP_SWAP_I64:
12503 LoadMnemonic = PPC::LDARX;
12504 StoreMnemonic = PPC::STDCX;
12510 Register oldval =
MI.getOperand(3).getReg();
12511 Register newval =
MI.getOperand(4).getReg();
12518 F->insert(It, loop1MBB);
12519 F->insert(It, loop2MBB);
12520 F->insert(It, midMBB);
12521 F->insert(It, exitMBB);
12529 BB->addSuccessor(loop1MBB);
12544 BuildMI(
BB, dl,
TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12551 BB->addSuccessor(loop2MBB);
12552 BB->addSuccessor(midMBB);
12564 BB->addSuccessor(loop1MBB);
12565 BB->addSuccessor(exitMBB);
12572 BB->addSuccessor(exitMBB);
12577 }
else if (
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12578 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12582 bool is64bit = Subtarget.
isPPC64();
12584 bool is8bit =
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12589 Register oldval =
MI.getOperand(3).getReg();
12590 Register newval =
MI.getOperand(4).getReg();
12597 F->insert(It, loop1MBB);
12598 F->insert(It, loop2MBB);
12599 F->insert(It, midMBB);
12600 F->insert(It, exitMBB);
12607 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12626 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12630 BB->addSuccessor(loop1MBB);
12659 if (ptrA != ZeroReg) {
12661 BuildMI(
BB, dl,
TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12671 .
addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12674 .
addImm(is8bit ? 28 : 27);
12675 if (!isLittleEndian)
12678 .
addImm(is8bit ? 24 : 16);
12728 BB->addSuccessor(loop2MBB);
12729 BB->addSuccessor(midMBB);
12747 BB->addSuccessor(loop1MBB);
12748 BB->addSuccessor(exitMBB);
12755 BB->addSuccessor(exitMBB);
12763 }
else if (
MI.getOpcode() == PPC::FADDrtz) {
12798 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12799 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12800 unsigned Opcode = (
MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12801 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12805 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12809 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12813 .
addReg(
MI.getOperand(1).getReg())
12816 MI.getOperand(0).getReg())
12817 .
addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12818 }
else if (
MI.getOpcode() == PPC::TCHECK_RET) {
12824 MI.getOperand(0).getReg())
12826 }
else if (
MI.getOpcode() == PPC::TBEGIN_RET) {
12828 unsigned Imm =
MI.getOperand(1).getImm();
12831 MI.getOperand(0).getReg())
12833 }
else if (
MI.getOpcode() == PPC::SETRNDi) {
12835 Register OldFPSCRReg =
MI.getOperand(0).getReg();
12839 BuildMI(*
BB,
MI, dl,
TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
12852 unsigned Mode =
MI.getOperand(1).getImm();
12860 }
else if (
MI.getOpcode() == PPC::SETRND) {
12868 auto copyRegFromG8RCOrF8RC = [&] (
unsigned DestReg,
unsigned SrcReg) {
12874 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12877 if (RC == &PPC::F8RCRegClass) {
12880 "Unsupported RegClass.");
12882 StoreOp = PPC::STFD;
12887 (RegInfo.
getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12888 "Unsupported RegClass.");
12921 Register OldFPSCRReg =
MI.getOperand(0).getReg();
12938 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12946 BuildMI(*
BB,
MI, dl,
TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12960 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12969 }
else if (
MI.getOpcode() == PPC::SETFLM) {
12973 Register OldFPSCRReg =
MI.getOperand(0).getReg();
12975 BuildMI(*
BB,
MI, Dl,
TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
12980 Register NewFPSCRReg =
MI.getOperand(1).getReg();
12986 }
else if (
MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12987 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12989 }
else if (
MI.getOpcode() == PPC::SPLIT_QUADWORD) {
12996 .
addUse(Src, 0, PPC::sub_gp8_x1);
12999 .
addUse(Src, 0, PPC::sub_gp8_x0);
13000 }
else if (
MI.getOpcode() == PPC::LQX_PSEUDO ||
13001 MI.getOpcode() == PPC::STQX_PSEUDO) {
13007 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13013 MI.getOpcode() == PPC::LQX_PSEUDO ?
TII->get(PPC::LQ)
13014 :
TII->get(PPC::STQ))
13022 MI.eraseFromParent();
13035 int RefinementSteps = Subtarget.
hasRecipPrec() ? 1 : 3;
13038 return RefinementSteps;
13044 EVT VT =
Op.getValueType();
13071 PPCTargetLowering::getSqrtResultForDenormInput(
SDValue Op,
13074 EVT VT =
Op.getValueType();
13083 int Enabled,
int &RefinementSteps,
13084 bool &UseOneConstNR,
13085 bool Reciprocal)
const {
13091 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13104 int &RefinementSteps)
const {
13110 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13117 unsigned PPCTargetLowering::combineRepeatedFPDivisors()
const {
13146 Offset += cast<ConstantSDNode>(Loc.
getOperand(1))->getSExtValue();
13155 unsigned Bytes,
int Dist,
13165 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13166 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13169 if (
FS != BFS ||
FS != (
int)Bytes)
return false;
13173 SDValue Base1 = Loc, Base2 = BaseLoc;
13174 int64_t Offset1 = 0, Offset2 = 0;
13177 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13187 if (isGA1 && isGA2 && GV1 == GV2)
13188 return Offset1 == (Offset2 + Dist*Bytes);
13195 unsigned Bytes,
int Dist,
13198 EVT VT =
LS->getMemoryVT();
13205 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
13206 default:
return false;
13207 case Intrinsic::ppc_altivec_lvx:
13208 case Intrinsic::ppc_altivec_lvxl:
13209 case Intrinsic::ppc_vsx_lxvw4x:
13210 case Intrinsic::ppc_vsx_lxvw4x_be:
13213 case Intrinsic::ppc_vsx_lxvd2x:
13214 case Intrinsic::ppc_vsx_lxvd2x_be:
13217 case Intrinsic::ppc_altivec_lvebx:
13220 case Intrinsic::ppc_altivec_lvehx:
13223 case Intrinsic::ppc_altivec_lvewx:
13233 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
13234 default:
return false;
13235 case Intrinsic::ppc_altivec_stvx:
13236 case Intrinsic::ppc_altivec_stvxl:
13237 case Intrinsic::ppc_vsx_stxvw4x:
13240 case Intrinsic::ppc_vsx_stxvd2x:
13243 case Intrinsic::ppc_vsx_stxvw4x_be:
13246 case Intrinsic::ppc_vsx_stxvd2x_be:
13249 case Intrinsic::ppc_altivec_stvebx:
13252 case Intrinsic::ppc_altivec_stvehx:
13255 case Intrinsic::ppc_altivec_stvewx:
13273 EVT VT =
LD->getMemoryVT();
13282 while (!Queue.empty()) {
13283 SDNode *ChainNext = Queue.pop_back_val();
13284 if (!Visited.
insert(ChainNext).second)
13287 if (
MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13291 if (!Visited.
count(ChainLD->getChain().getNode()))
13292 Queue.push_back(ChainLD->getChain().getNode());
13294 for (
const SDUse &
O : ChainNext->
ops())
13295 if (!Visited.
count(
O.getNode()))
13296 Queue.push_back(
O.getNode());
13298 LoadRoots.
insert(ChainNext);
13311 Queue.push_back(*
I);
13313 while (!Queue.empty()) {
13314 SDNode *LoadRoot = Queue.pop_back_val();
13315 if (!Visited.
insert(LoadRoot).second)
13318 if (
MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13323 if (((isa<MemSDNode>(U) &&
13324 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13327 Queue.push_back(U);
13360 auto Final = Shifted;
13371 DAGCombinerInfo &DCI)
const {
13379 if (!DCI.isAfterLegalizeDAG())
13384 for (
const SDNode *U :
N->uses())
13388 ISD::CondCode CC = cast<CondCodeSDNode>(
N->getOperand(2))->get();
13389 auto OpSize =
N->getOperand(0).getValueSizeInBits();
13393 if (OpSize < Size) {
13411 DAGCombinerInfo &DCI)
const {
13429 if (
N->getOperand(0).getValueType() !=
MVT::i32 &&
13430 N->getOperand(0).getValueType() !=
MVT::i64)
13438 cast<CondCodeSDNode>(
N->getOperand(
13440 unsigned OpBits =
N->getOperand(0).getValueSizeInBits();
13451 return (
N->getOpcode() ==
ISD::SETCC ? ConvertSETCCToSubtract(
N, DCI)
13474 if (
N->getOperand(0).getOpcode() !=
ISD::AND &&
13475 N->getOperand(0).getOpcode() !=
ISD::OR &&
13476 N->getOperand(0).getOpcode() !=
ISD::XOR &&
13486 N->getOperand(1).getOpcode() !=
ISD::AND &&
13487 N->getOperand(1).getOpcode() !=
ISD::OR &&
13488 N->getOperand(1).getOpcode() !=
ISD::XOR &&
13501 for (
unsigned i = 0;
i < 2; ++
i) {
13505 N->getOperand(
i).getOperand(0).getValueType() ==
MVT::i1) ||
13506 isa<ConstantSDNode>(
N->getOperand(
i)))
13507 Inputs.push_back(
N->getOperand(
i));
13509 BinOps.push_back(
N->getOperand(
i));
13517 while (!BinOps.empty()) {
13523 PromOps.push_back(BinOp);
13559 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13560 if (isa<ConstantSDNode>(Inputs[
i]))
13583 for (
unsigned i = 0, ie = PromOps.size();
i != ie; ++
i) {
13605 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13608 if (isa<ConstantSDNode>(Inputs[
i]))
13614 std::list<HandleSDNode> PromOpHandles;
13615 for (
auto &PromOp : PromOps)
13616 PromOpHandles.emplace_back(PromOp);
13623 while (!PromOpHandles.empty()) {
13625 PromOpHandles.pop_back();
13631 if (!isa<ConstantSDNode>(PromOp.
getOperand(0)) &&
13634 PromOpHandles.emplace_front(PromOp);
13639 if (isa<ConstantSDNode>(RepValue))
13648 default:
C = 0;
break;
13653 if ((!isa<ConstantSDNode>(PromOp.
getOperand(
C)) &&
13661 PromOpHandles.emplace_front(PromOp);
13669 for (
unsigned i = 0;
i < 2; ++
i)
13670 if (isa<ConstantSDNode>(Ops[
C+
i]))
13679 return N->getOperand(0);
13687 DAGCombinerInfo &DCI)
const {
13713 if (
N->getOperand(0).getOpcode() !=
ISD::AND &&
13714 N->getOperand(0).getOpcode() !=
ISD::OR &&
13715 N->getOperand(0).getOpcode() !=
ISD::XOR &&
13726 while (!BinOps.empty()) {
13732 PromOps.push_back(BinOp);
13765 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13766 if (isa<ConstantSDNode>(Inputs[
i]))
13777 SelectTruncOp[0].
insert(std::make_pair(
User,
13781 SelectTruncOp[0].
insert(std::make_pair(
User,
13784 SelectTruncOp[1].
insert(std::make_pair(
User,
13790 for (
unsigned i = 0, ie = PromOps.size();
i != ie; ++
i) {
13799 SelectTruncOp[0].
insert(std::make_pair(
User,
13803 SelectTruncOp[0].
insert(std::make_pair(
User,
13806 SelectTruncOp[1].
insert(std::make_pair(
User,
13812 unsigned PromBits =
N->getOperand(0).getValueSizeInBits();
13813 bool ReallyNeedsExt =
false;
13817 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13818 if (isa<ConstantSDNode>(Inputs[
i]))
13822 Inputs[
i].getOperand(0).getValueSizeInBits();
13823 assert(PromBits < OpBits &&
"Truncation not to a smaller bit count?");
13828 OpBits-PromBits))) ||
13831 (OpBits-(PromBits-1)))) {
13832 ReallyNeedsExt =
true;
13840 for (
unsigned i = 0, ie = Inputs.size();
i != ie; ++
i) {
13844 if (isa<ConstantSDNode>(Inputs[
i]))
13847 SDValue InSrc = Inputs[
i].getOperand(0);
13861 std::list<HandleSDNode> PromOpHandles;
13862 for (
auto &PromOp : PromOps)
13863 PromOpHandles.emplace_back(PromOp);
13869 while (!PromOpHandles.empty()) {
13871 PromOpHandles.pop_back();
13875 default:
C = 0;
break;
13880 if ((!isa<ConstantSDNode>(PromOp.
getOperand(
C)) &&
13888 PromOpHandles.emplace_front(PromOp);
13898 (SelectTruncOp[1].count(PromOp.
getNode()) &&
13900 PromOpHandles.emplace_front(PromOp);
13909 for (
unsigned i = 0;
i < 2; ++
i) {
13910 if (!isa<ConstantSDNode>(Ops[
C+
i]))
13927 auto SI0 = SelectTruncOp[0].
find(PromOp.
getNode());
13928 if (SI0 != SelectTruncOp[0].
end())
13930 auto SI1 = SelectTruncOp[1].
find(PromOp.
getNode());
13931 if (SI1 != SelectTruncOp[1].
end())
13940 if (!ReallyNeedsExt)
13941 return N->getOperand(0);
13948 N->getValueSizeInBits(0), PromBits),
13949 dl,
N->getValueType(0)));
13952 "Invalid extension type");
13955 DAG.
getConstant(
N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13963 DAGCombinerInfo &DCI)
const {
13965 "Should be called with a SETCC node");
13967 ISD::CondCode CC = cast<CondCodeSDNode>(
N->getOperand(2))->get();
13983 EVT VT =
N->getValueType(0);
13984 EVT OpVT =
LHS.getValueType();
13990 return DAGCombineTruncBoolExt(
N, DCI);
14009 combineElementTruncationToVectorTruncation(
SDNode *
N,
14010 DAGCombinerInfo &DCI)
const {
14012 "Should be called with a BUILD_VECTOR node");
14017 SDValue FirstInput =
N->getOperand(0);
14019 "The input operand must be an fp-to-int conversion.");
14028 bool IsSplat =
true;
14033 EVT TargetVT =
N->getValueType(0);
14034 for (
int i = 0,
e =
N->getNumOperands();
i <
e; ++
i) {
14039 if (NextConversion != FirstConversion)
14047 if (
N->getOperand(
i) != FirstInput)
14058 for (
int i = 0,
e =
N->getNumOperands();
i <
e; ++
i) {
14064 Ops.push_back(DAG.
getUNDEF(SrcVT));
14069 Ops.push_back(Trunc);
14072 Ops.push_back(
In.isUndef() ? DAG.
getUNDEF(SrcVT) :
In.getOperand(0));
14097 "Should be called with a BUILD_VECTOR node");
14102 if (!
N->getValueType(0).getVectorElementType().isByteSized())
14105 bool InputsAreConsecutiveLoads =
true;
14106 bool InputsAreReverseConsecutive =
true;
14107 unsigned ElemSize =
N->getValueType(0).getScalarType().getStoreSize();
14108 SDValue FirstInput =
N->getOperand(0);
14109 bool IsRoundOfExtLoad =
false;
14118 N->getNumOperands() == 1)
14121 for (
int i = 1,
e =
N->getNumOperands();
i <
e; ++
i) {
14123 if (IsRoundOfExtLoad &&
N->getOperand(
i).getOpcode() !=
ISD::FP_ROUND)
14126 SDValue NextInput = IsRoundOfExtLoad ?
N->getOperand(
i).getOperand(0) :
14132 IsRoundOfExtLoad ?
N->getOperand(
i-1).getOperand(0) :
N->getOperand(
i-1);
14133 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
14134 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
14141 InputsAreConsecutiveLoads =
false;
14143 InputsAreReverseConsecutive =
false;
14146 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14150 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14151 "The loads cannot be both consecutive and reverse consecutive.");
14154 IsRoundOfExtLoad ? FirstInput.
getOperand(0) : FirstInput;
14156 IsRoundOfExtLoad ?
N->getOperand(
N->getNumOperands()-1).getOperand(0) :
14157 N->getOperand(
N->getNumOperands()-1);
14159 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
14161 if (InputsAreConsecutiveLoads) {
14162 assert(LD1 &&
"Input needs to be a LoadSDNode.");
14167 if (InputsAreReverseConsecutive) {
14168 assert(
LDL &&
"Input needs to be a LoadSDNode.");
14170 DAG.
getLoad(
N->getValueType(0), dl,
LDL->getChain(),
LDL->getBasePtr(),
14171 LDL->getPointerInfo(),
LDL->getAlign());
14173 for (
int i =
N->getNumOperands() - 1;
i >= 0;
i--)
14177 DAG.
getUNDEF(
N->getValueType(0)), Ops);
14190 unsigned NumElems = Input.getValueType().getVectorNumElements();
14196 for (
unsigned i = 0;
i <
N->getNumOperands();
i++) {
14198 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14200 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14201 CorrectElems = CorrectElems >> 8;
14202 Elems = Elems >> 8;
14207 DAG.
getUNDEF(Input.getValueType()), ShuffleMask);
14209 EVT VT =
N->getValueType(0);
14213 Input.getValueType().getVectorElementType(),
14247 auto isSExtOfVecExtract = [&](
SDValue Op) ->
bool {
14267 if (Input && Input != Extract.
getOperand(0))
14273 Elems = Elems << 8;
14282 for (
unsigned i = 0;
i <
N->getNumOperands();
i++) {
14283 if (!isSExtOfVecExtract(
N->getOperand(
i))) {
14290 int TgtElemArrayIdx;
14291 int InputSize = Input.getValueType().getScalarSizeInBits();
14292 int OutputSize =
N->getValueType(0).getScalarSizeInBits();
14293 if (InputSize + OutputSize == 40)
14294 TgtElemArrayIdx = 0;
14295 else if (InputSize + OutputSize == 72)
14296 TgtElemArrayIdx = 1;
14297 else if (InputSize + OutputSize == 48)
14298 TgtElemArrayIdx = 2;
14299 else if (InputSize + OutputSize == 80)
14300 TgtElemArrayIdx = 3;
14301 else if (InputSize + OutputSize == 96)
14302 TgtElemArrayIdx = 4;
14306 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14308 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14309 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14310 if (Elems != CorrectElems) {
14329 SDValue Operand =
N->getOperand(0);
14335 auto *
LD = cast<LoadSDNode>(Operand);
14344 if (!ValidLDType ||
14350 LD->getChain(),
LD->getBasePtr(),
14359 DAGCombinerInfo &DCI)
const {
14361 "Should be called with a BUILD_VECTOR node");
14366 if (!Subtarget.
hasVSX())
14372 SDValue FirstInput =
N->getOperand(0);
14374 SDValue Reduced = combineElementTruncationToVectorTruncation(
N, DCI);
14389 if (Subtarget.
hasP9Altivec() && !DCI.isBeforeLegalize()) {
14415 if (FirstInput.
getOpcode() !=
N->getOperand(1).getOpcode())
14419 SDValue Ext2 =
N->getOperand(1).getOperand(0);
14426 if (!Ext1Op || !Ext2Op)
14435 if (FirstElem == 0 && SecondElem == 1)
14437 else if (FirstElem == 2 && SecondElem == 3)
14450 DAGCombinerInfo &DCI)
const {
14453 "Need an int -> FP conversion node here");
14466 if (!
Op.getOperand(0).getValueType().isSimple())
14468 if (
Op.getOperand(0).getValueType().getSimpleVT() <=
MVT(
MVT::i1) ||
14469 Op.getOperand(0).getValueType().getSimpleVT() >
MVT(
MVT::i64))
14472 SDValue FirstOperand(
Op.getOperand(0));
14473 bool SubWordLoad = FirstOperand.getOpcode() ==
ISD::LOAD &&
14474 (FirstOperand.getValueType() ==
MVT::i8 ||
14475 FirstOperand.getValueType() ==
MVT::i16);
14478 bool DstDouble =
Op.getValueType() ==
MVT::f64;
14479 unsigned ConvOp =
Signed ?
14485 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14493 SDValue ExtOps[] = { Ld, WidthConst };
14505 if (
Op.getOperand(0).getValueType() ==
MVT::i32)
14509 "UINT_TO_FP is supported only with FPCVT");
14527 SDValue Src =
Op.getOperand(0).getOperand(0);
14528 if (Src.getValueType() ==
MVT::f32) {
14530 DCI.AddToWorklist(Src.getNode());
14531 }
else if (Src.getValueType() !=
MVT::f64) {
14546 DCI.AddToWorklist(
FP.getNode());
14570 switch (
N->getOpcode()) {
14575 Chain =
LD->getChain();
14576 Base =
LD->getBasePtr();
14577 MMO =
LD->getMemOperand();
14596 MVT VecTy =
N->getValueType(0).getSimpleVT();
14604 Chain =
Load.getValue(1);
14637 switch (
N->getOpcode()) {
14642 Chain =
ST->getChain();
14643 Base =
ST->getBasePtr();
14644 MMO =
ST->getMemOperand();
14664 SDValue Src =
N->getOperand(SrcOpnd);
14665 MVT VecTy = Src.getValueType().getSimpleVT();
14680 StoreOps, VecTy, MMO);
14687 DAGCombinerInfo &DCI)
const {
14691 unsigned Opcode =
N->getOperand(1).getOpcode();
14694 &&
"Not a FP_TO_INT Instruction!");
14696 SDValue Val =
N->getOperand(1).getOperand(0);
14697 EVT Op1VT =
N->getOperand(1).getValueType();
14704 bool ValidTypeForStoreFltAsInt =
14712 cast<StoreSDNode>(
N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14718 DCI.AddToWorklist(Val.
getNode());
14726 Val = DAG.
getNode(ConvOpcode,
14728 DCI.AddToWorklist(Val.
getNode());
14732 SDValue Ops[] = {
N->getOperand(0), Val,
N->getOperand(2),
14738 cast<StoreSDNode>(
N)->getMemoryVT(),
14739 cast<StoreSDNode>(
N)->getMemOperand());
14741 DCI.AddToWorklist(Val.
getNode());
14748 bool PrevElemFromFirstVec =
Mask[0] < NumElts;
14749 for (
int i = 1,
e =
Mask.size();
i <
e;
i++) {
14750 if (PrevElemFromFirstVec &&
Mask[
i] < NumElts)
14752 if (!PrevElemFromFirstVec &&
Mask[
i] >= NumElts)
14754 PrevElemFromFirstVec = !PrevElemFromFirstVec;
14765 for (
int i = 0,
e =
Op.getNumOperands();
i <
e;
i++) {
14766 FirstOp =
Op.getOperand(
i);
14772 for (
int i = 1,
e =
Op.getNumOperands();
i <
e;
i++)
14773 if (
Op.getOperand(
i) != FirstOp && !
Op.getOperand(
i).isUndef())
14783 Op =
Op.getOperand(0);
14798 int LHSMaxIdx,
int RHSMinIdx,
14799 int RHSMaxIdx,
int HalfVec,
14800 unsigned ValidLaneWidth,
14802 for (
int i = 0,
e = ShuffV.size();
i <
e;
i++) {
14803 int Idx = ShuffV[
i];
14804 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14806 Subtarget.
isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
14817 SDLoc dl(OrigSToV);
14820 "Expecting a SCALAR_TO_VECTOR here");
14824 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14825 SDValue OrigVector = Input.getOperand(0);
14833 "Cannot produce a permuted scalar_to_vector for one element vector");
14835 unsigned ResultInElt = NumElts / 2;
14862 int NumElts =
LHS.getValueType().getVectorNumElements();
14882 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14891 if (SToVLHS || SToVRHS) {
14898 if (SToVLHS && SToVRHS &&
14905 int NumEltsOut = ShuffV.size();
14910 unsigned ValidLaneWidth =
14912 LHS.getValueType().getScalarSizeInBits()
14914 RHS.getValueType().getScalarSizeInBits();
14918 int LHSMaxIdx = -1;
14919 int RHSMinIdx = -1;
14920 int RHSMaxIdx = -1;
14921 int HalfVec =
LHS.getValueType().getVectorNumElements() / 2;
14933 LHSMaxIdx = NumEltsOut / NumEltsIn;
14942 RHSMinIdx = NumEltsOut;
14943 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14956 HalfVec, ValidLaneWidth, Subtarget);
14961 if (!isa<ShuffleVectorSDNode>(Res))
14963 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14982 if (IsLittleEndian) {
14985 if (
Mask[0] < NumElts)
14986 for (
int i = 1,
e =
Mask.size();
i <
e;
i += 2) {
14989 ShuffV[
i] = (ShuffV[
i - 1] + NumElts);
14994 for (
int i = 0,
e =
Mask.size();
i <
e;
i += 2) {
14997 ShuffV[
i] = (ShuffV[
i + 1] + NumElts);
15002 if (
Mask[0] < NumElts)
15003 for (
int i = 0,
e =
Mask.size();
i <
e;
i += 2) {
15006 ShuffV[
i] = ShuffV[
i + 1] - NumElts;
15011 for (
int i = 1,
e =
Mask.size();
i <
e;
i += 2) {
15014 ShuffV[
i] = ShuffV[
i - 1] - NumElts;
15021 cast<BuildVectorSDNode>(TheSplat.
getNode())->getSplatValue();
15024 if (IsLittleEndian)
15033 DAGCombinerInfo &DCI)
const {
15035 "Not a reverse memop pattern!");
15040 auto I =
Mask.rbegin();
15041 auto E =
Mask.rend();
15043 for (;
I !=
E; ++
I) {
15063 if(!IsElementReverse(SVN))
15104 switch (
N->getOpcode()) {
15107 return combineADD(
N, DCI);
15109 return combineSHL(
N, DCI);
15111 return combineSRA(
N, DCI);
15113 return combineSRL(
N, DCI);
15115 return combineMUL(
N, DCI);
15118 return combineFMALike(
N, DCI);
15121 return N->getOperand(0);
15125 return N->getOperand(0);
15131 return N->getOperand(0);
15137 return DAGCombineExtBoolTrunc(
N, DCI);
15139 return combineTRUNCATE(
N, DCI);
15141 if (
SDValue CSCC = combineSetCC(
N, DCI))
15145 return DAGCombineTruncBoolExt(
N, DCI);
15148 return combineFPToIntToFP(
N, DCI);
15151 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(
N->getOperand(0));
15152 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(
N), LSBase, DCI);
15154 return combineVectorShuffle(cast<ShuffleVectorSDNode>(
N), DCI.
DAG);
15157 EVT Op1VT =
N->getOperand(1).getValueType();
15158 unsigned Opcode =
N->getOperand(1).getOpcode();
15161 SDValue Val= combineStoreFPToInt(
N, DCI);
15168 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(
N), DCI);
15175 N->getOperand(1).getNode()->hasOneUse() &&
15181 EVT mVT = cast<StoreSDNode>(
N)->getMemoryVT();
15185 SDValue BSwapOp =
N->getOperand(1).getOperand(0);
15192 if (Op1VT.
bitsGT(mVT)) {
15202 N->getOperand(0), BSwapOp,
N->getOperand(2), DAG.
getValueType(mVT)
15206 Ops, cast<StoreSDNode>(
N)->getMemoryVT(),
15207 cast<StoreSDNode>(
N)->getMemOperand());
15213 isa<ConstantSDNode>(
N->getOperand(1)) && Op1VT ==
MVT::i32) {
15215 EVT MemVT = cast<StoreSDNode>(
N)->getMemoryVT();
15225 cast<StoreSDNode>(
N)->setTruncatingStore(
true);
15242 EVT VT =
LD->getValueType(0);
15261 auto ReplaceTwoFloatLoad = [&]() {
15277 if (!
LD->hasNUsesOfValue(2, 0))
15280 auto UI =
LD->use_begin();
15281 while (UI.getUse().getResNo() != 0) ++UI;
15283 while (UI.getUse().getResNo() != 0) ++UI;
15284 SDNode *RightShift = *UI;
15292 if (RightShift->getOpcode() !=
ISD::SRL ||
15293 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15294 RightShift->getConstantOperandVal(1) != 32 ||
15295 !RightShift->hasOneUse())
15298 SDNode *Trunc2 = *RightShift->use_begin();
15321 if (
LD->isIndexed()) {
15323 "Non-pre-inc AM on PPC?");
15332 LD->getPointerInfo(),
LD->getAlign(),
15333 MMOFlags,
LD->getAAInfo());
15339 LD->getPointerInfo().getWithOffset(4),
15342 if (
LD->isIndexed()) {
15356 if (ReplaceTwoFloatLoad())
15359 EVT MemVT =
LD->getMemoryVT();
15368 LD->getAlign() < ABIAlignment) {
15399 MVT PermCntlTy, PermTy, LDTy;
15400 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15401 : Intrinsic::ppc_altivec_lvsl;
15402 IntrLD = Intrinsic::ppc_altivec_lvx;
15403 IntrPerm = Intrinsic::ppc_altivec_vperm;
15424 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
15428 BaseLoadOps, LDTy, BaseMMO);
15437 int IncValue = IncOffset;
15454 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15458 ExtraLoadOps, LDTy, ExtraMMO);
15469 if (isLittleEndian)
15471 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15474 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15493 unsigned IID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
15495 : Intrinsic::ppc_altivec_lvsl);
15496 if (IID ==
Intr &&
N->getOperand(1)->getOpcode() ==
ISD::ADD) {
15503 .
zext(Add.getScalarValueSizeInBits()))) {
15504 SDNode *BasePtr = Add->getOperand(0).getNode();
15505 for (
SDNode *U : BasePtr->uses()) {
15507 cast<ConstantSDNode>(U->getOperand(0))->getZExtValue() == IID) {
15517 if (isa<ConstantSDNode>(Add->getOperand(1))) {
15518 SDNode *BasePtr = Add->getOperand(0).getNode();
15519 for (
SDNode *U : BasePtr->uses()) {
15521 isa<ConstantSDNode>(U->getOperand(1)) &&
15522 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15523 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue()) %
15529 cast<ConstantSDNode>(V->getOperand(0))->getZExtValue() ==
15542 (IID == Intrinsic::ppc_altivec_vmaxsw ||
15543 IID == Intrinsic::ppc_altivec_vmaxsh ||
15544 IID == Intrinsic::ppc_altivec_vmaxsb)) {
15560 V2.getOperand(1) == V1) {
15578 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
15581 case Intrinsic::ppc_vsx_lxvw4x:
15582 case Intrinsic::ppc_vsx_lxvd2x:
15591 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
15594 case Intrinsic::ppc_vsx_stxvw4x:
15595 case Intrinsic::ppc_vsx_stxvd2x:
15604 bool Is64BitBswapOn64BitTgt =
15607 N->getOperand(0).hasOneUse();
15608 if (IsSingleUseNormalLd &&
15610 (Subtarget.
hasLDBRX() && Is64BitBswapOn64BitTgt))) {
15623 Ops,
LD->getMemoryVT(),
LD->getMemOperand());
15644 !IsSingleUseNormalLd)
15649 if (!
LD->isSimple())
15653 LD->getPointerInfo(),
LD->getAlign());
15658 LD->getMemOperand(), 4, 4);
15668 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
15677 if (!
N->getOperand(0).hasOneUse() &&
15678 !
N->getOperand(1).hasOneUse() &&
15679 !
N->getOperand(2).hasOneUse()) {
15682 SDNode *VCMPrecNode =
nullptr;
15684 SDNode *LHSN =
N->getOperand(0).getNode();
15688 UI->getOperand(1) ==
N->getOperand(1) &&
15689 UI->getOperand(2) ==
N->getOperand(2) &&
15690 UI->getOperand(0) ==
N->getOperand(0)) {
15703 SDNode *FlagUser =
nullptr;
15705 FlagUser ==
nullptr; ++UI) {
15706 assert(UI != VCMPrecNode->
use_end() &&
"Didn't find user!");
15719 return SDValue(VCMPrecNode, 0);
15727 cast<ConstantSDNode>(
Cond.getOperand(1))->getZExtValue() ==
15728 Intrinsic::loop_decrement) {
15734 "Counter decrement has more than one use");
15746 ISD::CondCode CC = cast<CondCodeSDNode>(
N->getOperand(1))->get();
15753 cast<ConstantSDNode>(
LHS.getOperand(0).getOperand(1))->getZExtValue() ==
15754 Intrinsic::loop_decrement &&
15755 isa<ConstantSDNode>(
LHS.getOperand(1)) &&
15757 LHS =
LHS.getOperand(0);
15760 cast<ConstantSDNode>(
LHS.getOperand(1))->getZExtValue() ==
15761 Intrinsic::loop_decrement &&
15762 isa<ConstantSDNode>(
RHS)) {
15764 "Counter decrement comparison is not EQ or NE");
15766 unsigned Val = cast<ConstantSDNode>(
RHS)->getZExtValue();
15774 "Counter decrement has more than one use");
15777 N->getOperand(0),
N->getOperand(4));
15786 assert(isDot &&
"Can't compare against a vector result!");
15790 unsigned Val = cast<ConstantSDNode>(
RHS)->getZExtValue();
15791 if (Val != 0 && Val != 1) {
15793 return N->getOperand(0);
15796 N->getOperand(0),
N->getOperand(4));
15799 bool BranchOnWhenPredTrue = (CC ==
ISD::SETEQ) ^ (Val == 0);
15812 switch (cast<ConstantSDNode>(
LHS.getOperand(1))->getZExtValue()) {
15831 N->getOperand(4), CompNode.
getValue(1));
15836 return DAGCombineBuildVector(
N, DCI);
15838 return combineABS(
N, DCI);
15840 return combineVSelect(
N, DCI);
15851 EVT VT =
N->getValueType(0);
15866 Created.push_back(
Op.getNode());
15870 Created.push_back(
Op.getNode());
15882 const APInt &DemandedElts,
15884 unsigned Depth)
const {
15886 switch (
Op.getOpcode()) {
15890 if (cast<VTSDNode>(
Op.getOperand(2))->getVT() ==
MVT::i16)
15891 Known.
Zero = 0xFFFF0000;
15895 switch (cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue()) {
15897 case Intrinsic::ppc_altivec_vcmpbfp_p:
15898 case Intrinsic::ppc_altivec_vcmpeqfp_p:
15899 case Intrinsic::ppc_altivec_vcmpequb_p:
15900 case Intrinsic::ppc_altivec_vcmpequh_p:
15901 case Intrinsic::ppc_altivec_vcmpequw_p:
15902 case Intrinsic::ppc_altivec_vcmpequd_p:
15903 case Intrinsic::ppc_altivec_vcmpequq_p:
15904 case Intrinsic::ppc_altivec_vcmpgefp_p:
15905 case Intrinsic::ppc_altivec_vcmpgtfp_p:
15906 case Intrinsic::ppc_altivec_vcmpgtsb_p:
15907 case Intrinsic::ppc_altivec_vcmpgtsh_p:
15908 case Intrinsic::ppc_altivec_vcmpgtsw_p:
15909 case Intrinsic::ppc_altivec_vcmpgtsd_p:
15910 case Intrinsic::ppc_altivec_vcmpgtsq_p:
15911 case Intrinsic::ppc_altivec_vcmpgtub_p:
15912 case Intrinsic::ppc_altivec_vcmpgtuh_p:
15913 case Intrinsic::ppc_altivec_vcmpgtuw_p:
15914 case Intrinsic::ppc_altivec_vcmpgtud_p:
15915 case Intrinsic::ppc_altivec_vcmpgtuq_p:
15922 switch (cast<ConstantSDNode>(
Op.getOperand(1))->getZExtValue()) {
15925 case Intrinsic::ppc_load2r:
15927 Known.
Zero = 0xFFFF0000;
15967 for (
auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15968 LoopSize +=
TII->getInstSizeInBytes(*J);
15973 if (LoopSize > 16 && LoopSize <= 32)
15987 if (Constraint.
size() == 1) {
15988 switch (Constraint[0]) {
16006 }
else if (Constraint ==
"wc") {
16008 }
else if (Constraint ==
"wa" || Constraint ==
"wd" ||
16009 Constraint ==
"wf" || Constraint ==
"ws" ||
16010 Constraint ==
"wi" || Constraint ==
"ww") {
16023 Value *CallOperandVal =
info.CallOperandVal;
16026 if (!CallOperandVal)
16033 else if ((
StringRef(constraint) ==
"wa" ||
16036 type->isVectorTy())
16038 else if (
StringRef(constraint) ==
"wi" &&
type->isIntegerTy(64))
16040 else if (
StringRef(constraint) ==
"ws" &&
type->isDoubleTy())
16042 else if (
StringRef(constraint) ==
"ww" &&
type->isFloatTy())
16045 switch (*constraint) {
16050 if (
type->isIntegerTy())
16054 if (
type->isFloatTy())
16058 if (
type->isDoubleTy())
16062 if (
type->isVectorTy())
16075 std::pair<unsigned, const TargetRegisterClass *>
16079 if (Constraint.
size() == 1) {
16081 switch (Constraint[0]) {
16084 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16085 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16088 return std::make_pair(0U, &PPC::G8RCRegClass);
16089 return std::make_pair(0U, &PPC::GPRCRegClass);
16095 if (Subtarget.
hasSPE()) {
16097 return std::make_pair(0U, &PPC::GPRCRegClass);
16099 return std::make_pair(0U, &PPC::SPERCRegClass);
16102 return std::make_pair(0U, &PPC::F4RCRegClass);
16104 return std::make_pair(0U, &PPC::F8RCRegClass);
16109 return std::make_pair(0U, &PPC::VRRCRegClass);
16110 else if (Subtarget.
hasVSX())
16112 return std::make_pair(0U, &PPC::VFRCRegClass);
16115 return std::make_pair(0U, &PPC::CRRCRegClass);
16117 }
else if (Constraint ==
"wc" && Subtarget.
useCRBits()) {
16119 return std::make_pair(0U, &PPC::CRBITRCRegClass);
16120 }
else if ((Constraint ==
"wa" || Constraint ==
"wd" ||
16121 Constraint ==
"wf" || Constraint ==
"wi") &&
16126 return std::make_pair(0U, &PPC::VSRCRegClass);
16128 return std::make_pair(0U, &PPC::VSSRCRegClass);
16129 return std::make_pair(0U, &PPC::VSFRCRegClass);
16130 }
else if ((Constraint ==
"ws" || Constraint ==
"ww") && Subtarget.
hasVSX()) {
16132 return std::make_pair(0U, &PPC::VSSRCRegClass);
16134 return std::make_pair(0U, &PPC::VSFRCRegClass);
16135 }
else if (Constraint ==
"lr") {
16137 return std::make_pair(0U, &PPC::LR8RCRegClass);
16139 return std::make_pair(0U, &PPC::LRRCRegClass);
16144 if (Constraint[0] ==
'{' && Constraint[Constraint.
size() - 1] ==
'}') {
16148 if (Constraint.
size() > 3 && Constraint[1] ==
'v' && Constraint[2] ==
's') {
16149 int VSNum = atoi(Constraint.
data() + 3);
16150 assert(VSNum >= 0 && VSNum <= 63 &&
16151 "Attempted to access a vsr out of range");
16153 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16154 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16159 if (Constraint.
size() > 3 && Constraint[1] ==
'f') {
16160 int RegNum = atoi(Constraint.
data() + 2);
16161 if (RegNum > 31 || RegNum < 0)
16164 return Subtarget.
hasSPE()
16165 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16166 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16168 return Subtarget.
hasSPE()
16169 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16170 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16174 std::pair<unsigned, const TargetRegisterClass *> R =
16184 PPC::GPRCRegClass.contains(R.first))
16186 PPC::sub_32, &PPC::G8RCRegClass),
16187 &PPC::G8RCRegClass);
16190 if (!R.second &&
StringRef(
"{cc}").equals_insensitive(Constraint)) {
16191 R.first = PPC::CR0;
16192 R.second = &PPC::CRRCRegClass;
16196 if (Subtarget.
isAIXABI() && !
TM.getAIXExtendedAltivecABI()) {
16197 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16198 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16199 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16200 errs() <<
"warning: vector registers 20 to 32 are reserved in the "
16201 "default AIX AltiVec ABI and cannot be used\n";
16210 std::string &Constraint,
16211 std::vector<SDValue>&Ops,
16216 if (Constraint.length() > 1)
return;
16218 char Letter = Constraint[0];
16242 if (isShiftedUInt<16, 16>(
Value))
16246 if (isShiftedInt<16, 16>(
Value))
16274 if (Result.getNode()) {
16275 Ops.push_back(Result);
16307 switch (AM.
Scale) {
16338 unsigned Depth = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
16344 bool isPPC64 = Subtarget.
isPPC64();
16363 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
16371 unsigned Depth = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
16384 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
16386 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
16400 bool isPPC64 = Subtarget.
isPPC64();
16434 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
16452 unsigned Intrinsic)
const {
16453 switch (Intrinsic) {
16454 case Intrinsic::ppc_atomicrmw_xchg_i128:
16455 case Intrinsic::ppc_atomicrmw_add_i128:
16456 case Intrinsic::ppc_atomicrmw_sub_i128:
16457 case Intrinsic::ppc_atomicrmw_nand_i128:
16458 case Intrinsic::ppc_atomicrmw_and_i128:
16459 case Intrinsic::ppc_atomicrmw_or_i128:
16460 case Intrinsic::ppc_atomicrmw_xor_i128:
16461 case Intrinsic::ppc_cmpxchg_i128:
16464 Info.ptrVal =
I.getArgOperand(0);
16470 case Intrinsic::ppc_atomic_load_i128:
16473 Info.ptrVal =
I.getArgOperand(0);
16478 case Intrinsic::ppc_atomic_store_i128:
16481 Info.ptrVal =
I.getArgOperand(2);
16486 case Intrinsic::ppc_altivec_lvx:
16487 case Intrinsic::ppc_altivec_lvxl:
16488 case Intrinsic::ppc_altivec_lvebx:
16489 case Intrinsic::ppc_altivec_lvehx:
16490 case Intrinsic::ppc_altivec_lvewx:
16491 case Intrinsic::ppc_vsx_lxvd2x:
16492 case Intrinsic::ppc_vsx_lxvw4x:
16493 case Intrinsic::ppc_vsx_lxvd2x_be:
16494 case Intrinsic::ppc_vsx_lxvw4x_be:
16495 case Intrinsic::ppc_vsx_lxvl:
16496 case Intrinsic::ppc_vsx_lxvll: {
16498 switch (Intrinsic) {
16499 case Intrinsic::ppc_altivec_lvebx:
16502 case Intrinsic::ppc_altivec_lvehx:
16505 case Intrinsic::ppc_altivec_lvewx:
16508 case Intrinsic::ppc_vsx_lxvd2x:
16509 case Intrinsic::ppc_vsx_lxvd2x_be:
16519 Info.ptrVal =
I.getArgOperand(0);
16526 case Intrinsic::ppc_altivec_stvx:
16527 case Intrinsic::ppc_altivec_stvxl:
16528 case Intrinsic::ppc_altivec_stvebx:
16529 case Intrinsic::ppc_altivec_stvehx:
16530 case Intrinsic::ppc_altivec_stvewx:
16531 case Intrinsic::ppc_vsx_stxvd2x:
16532 case Intrinsic::ppc_vsx_stxvw4x:
16533 case Intrinsic::ppc_vsx_stxvd2x_be:
16534 case Intrinsic::ppc_vsx_stxvw4x_be:
16535 case Intrinsic::ppc_vsx_stxvl:
16536 case Intrinsic::ppc_vsx_stxvll: {
16538 switch (Intrinsic) {
16539 case Intrinsic::ppc_altivec_stvebx:
16542 case Intrinsic::ppc_altivec_stvehx:
16545 case Intrinsic::ppc_altivec_stvewx:
16548 case Intrinsic::ppc_vsx_stxvd2x:
16549 case Intrinsic::ppc_vsx_stxvd2x_be:
16559 Info.ptrVal =
I.getArgOperand(1);
16600 return !(BitSize == 0 || BitSize > 64);
16608 return NumBits1 == 64 && NumBits2 == 32;
16616 return NumBits1 == 64 && NumBits2 == 32;
16623 EVT MemVT =
LD->getMemoryVT();
16641 "invalid fpext types");
16658 bool *Fast)
const {
16676 if (Subtarget.
hasVSX()) {
16699 if (
auto *ConstNode = dyn_cast<ConstantSDNode>(
C.getNode())) {
16700 if (!ConstNode->getAPIntValue().isSignedIntN(64))
16708 int64_t
Imm = ConstNode->getSExtValue();
16709 unsigned Shift = countTrailingZeros<uint64_t>(
Imm);
16742 if (!
I->hasOneUse())
16746 assert(
User &&
"A single use instruction with no uses.");
16748 switch (
I->getOpcode()) {
16749 case Instruction::FMul: {
16751 if (
User->getOpcode() != Instruction::FSub &&
16752 User->getOpcode() != Instruction::FAdd)
16798 static const MCPhysReg ScratchRegs[] = {
16799 PPC::X12, PPC::LR8, PPC::CTR8, 0
16802 return ScratchRegs;
16806 const Constant *PersonalityFn)
const {
16807 return Subtarget.
isPPC64() ? PPC::X3 : PPC::R3;
16811 const Constant *PersonalityFn)
const {
16817 EVT VT ,
unsigned DefinedValues)
const {
16855 bool LegalOps,
bool OptForSize,
16857 unsigned Depth)
const {
16861 unsigned Opc =
Op.getOpcode();
16862 EVT VT =
Op.getValueType();
16891 N0Cost,
Depth + 1);
16895 N1Cost,
Depth + 1);
16897 if (NegN0 && N0Cost <= N1Cost) {
16899 return DAG.
getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16900 }
else if (NegN1) {
16902 return DAG.
getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16945 bool ForCodeSize)
const {
16962 return Imm.isPosZero();
16974 unsigned Opcode =
N->getOpcode();
16975 unsigned TargetOpcode;
16994 if (
Mask->getZExtValue() == OpSizeInBits - 1)
17000 SDValue PPCTargetLowering::combineSHL(
SDNode *
N, DAGCombinerInfo &DCI)
const {
17030 SDValue PPCTargetLowering::combineSRA(
SDNode *
N, DAGCombinerInfo &DCI)
const {
17037 SDValue PPCTargetLowering::combineSRL(
SDNode *
N, DAGCombinerInfo &DCI)
const {
17056 auto isZextOfCompareWithConstant = [](
SDValue Op) {
17062 if (Cmp.getOpcode() !=
ISD::SETCC || !Cmp.hasOneUse() ||
17063 Cmp.getOperand(0).getValueType() !=
MVT::i64)
17066 if (
auto *
Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17067 int64_t NegConstant = 0 -
Constant->getSExtValue();
17076 bool LHSHasPattern = isZextOfCompareWithConstant(
LHS);
17077 bool RHSHasPattern = isZextOfCompareWithConstant(
RHS);
17080 if (LHSHasPattern && !RHSHasPattern)
17082 else if (!LHSHasPattern && !RHSHasPattern)
17088 SDValue Z = Cmp.getOperand(0);
17089 auto *
Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17090 int64_t NegConstant = 0 -
Constant->getSExtValue();
17092 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17103 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17118 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17155 if (!GSDN || !ConstNode)
17162 if (!isInt<34>(NewOffset))
17175 SDValue PPCTargetLowering::combineADD(
SDNode *
N, DAGCombinerInfo &DCI)
const {
17195 DAGCombinerInfo &DCI)
const {
17199 if (
SDValue CRTruncValue = DAGCombineTruncBoolExt(
N, DCI))
17200 return CRTruncValue;
17208 EVT VT =
N->getValueType(0);
17219 DCI.DAG.getTargetConstant(0, dl,
MVT::i32));
17228 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
17238 EltToExtract = EltToExtract ? 0 : 1;
17248 return DCI.DAG.getNode(
17250 DCI.DAG.getTargetConstant(EltToExtract, dl,
MVT::i32));
17255 SDValue PPCTargetLowering::combineMUL(
SDNode *
N, DAGCombinerInfo &DCI)
const {
17259 if (!ConstOpOrElement)
17267 auto IsProfitable = [
this](
bool IsNeg,
bool IsAddOne,
EVT VT) ->
bool {
17290 return IsAddOne && IsNeg ? VT.
isVector() :
true;
17294 EVT VT =
N->getValueType(0);
17301 if ((MulAmtAbs - 1).isPowerOf2()) {
17305 if (!IsProfitable(IsNeg,
true, VT))
17318 }
else if ((MulAmtAbs + 1).isPowerOf2()) {
17322 if (!IsProfitable(IsNeg,
false, VT))
17343 DAGCombinerInfo &DCI)
const {
17348 EVT VT =
N->getValueType(0);
17351 unsigned Opc =
N->getOpcode();
17353 bool LegalOps = !DCI.isBeforeLegalizeOps();
17377 bool PPCTargetLowering::mayBeEmittedAsTailCall(
const CallInst *CI)
const {
17394 if (!Callee ||
Callee->isVarArg())
17407 bool PPCTargetLowering::hasBitPreservingFPLogic(
EVT VT)
const {
17408 if (!Subtarget.
hasVSX())
17416 bool PPCTargetLowering::
17417 isMaskAndCmp0FoldingBeneficial(
const Instruction &AndI)
const {
17422 if (CI->getBitWidth() > 64)
17424 int64_t ConstVal = CI->getZExtValue();
17426 (
isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
17438 SDValue PPCTargetLowering::combineABS(
SDNode *
N, DAGCombinerInfo &DCI)
const {
17441 "Only combine this when P9 altivec supported!");
17442 EVT VT =
N->getValueType(0);
17448 if (
N->getOperand(0).getOpcode() ==
ISD::SUB) {
17451 unsigned SubOpcd0 =
N->getOperand(0)->getOperand(0).getOpcode();
17452 unsigned SubOpcd1 =
N->getOperand(0)->getOperand(1).getOpcode();
17458 N->getOperand(0)->getOperand(0),
17459 N->getOperand(0)->getOperand(1),
17464 if (
N->getOperand(0).getValueType() ==
MVT::v4i32 &&
17465 N->getOperand(0).hasOneUse()) {
17467 N->getOperand(0)->getOperand(0),
17468 N->getOperand(0)->getOperand(1),
17482 DAGCombinerInfo &DCI)
const {
17485 "Only combine this when P9 altivec supported!");
17490 SDValue TrueOpnd =
N->getOperand(1);
17491 SDValue FalseOpnd =
N->getOperand(2);
17492 EVT VT =
N->getOperand(1).getValueType();
17532 CmpOpnd1, CmpOpnd2,
17541 PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(
unsigned Flags)
const {
17547 if ((Flags & FlagSet) == FlagSet)
17550 if ((Flags & FlagSet) == FlagSet)
17553 if ((Flags & FlagSet) == FlagSet)
17556 if ((Flags & FlagSet) == FlagSet)
17577 if ((FrameIndexAlign % 4) != 0)
17579 if ((FrameIndexAlign % 16) != 0)
17584 if ((FrameIndexAlign % 4) == 0)
17586 if ((FrameIndexAlign % 16) == 0)
17600 if ((
Imm & 0
x3) == 0)
17602 if ((
Imm & 0xf) == 0)
17608 const APInt &ConstImm = CN->getAPIntValue();
17627 const APInt &ConstImm = CN->getAPIntValue();
17638 !cast<ConstantSDNode>(
RHS.getOperand(1))->getZExtValue())
17650 isValidPCRelNode<ConstantPoolSDNode>(
N) ||
17651 isValidPCRelNode<GlobalAddressSDNode>(
N) ||
17652 isValidPCRelNode<JumpTableSDNode>(
N) ||
17653 isValidPCRelNode<BlockAddressSDNode>(
N));
17658 unsigned PPCTargetLowering::computeMOFlags(
const SDNode *Parent,
SDValue N,
17679 unsigned ParentOp = Parent->
getOpcode();
17682 unsigned ID = cast<ConstantSDNode>(Parent->
getOperand(1))->getZExtValue();
17683 if ((
ID == Intrinsic::ppc_vsx_lxvp) || (
ID == Intrinsic::ppc_vsx_stxvp)) {
17684 SDValue IntrinOp = (
ID == Intrinsic::ppc_vsx_lxvp)
17695 if (
const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
17696 if (LSB->isIndexed())
17701 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
17702 assert(MN &&
"Parent should be a MemSDNode!");
17707 "Not expecting scalar integers larger than 16 bytes!");
17710 else if (Size == 32)
17717 else if (Size == 256) {
17719 "256-bit vectors are only available when paired vector memops is "
17725 if (Size == 32 || Size == 64)
17737 if (
const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
17763 bool IsNonP1034BitConst =
17767 IsNonP1034BitConst)
17780 int16_t ForceXFormImm = 0;
17783 Disp =
N.getOperand(0);
17784 Base =
N.getOperand(1);
17795 !
N.getOperand(1).hasOneUse() || !
N.getOperand(0).hasOneUse())) {
17796 Disp =
N.getOperand(0);
17797 Base =
N.getOperand(1);
17827 SDValue PPCTargetLowering::lowerToLibCall(
const char *LibCallName,
SDValue Op,
17831 EVT RetVT =
Op.getValueType();
17839 EVT ArgVT =
N.getValueType();
17844 Entry.IsZExt = !Entry.IsSExt;
17845 Args.push_back(Entry);
17853 (RetTy ==
F.getReturnType() ||
F.getReturnType()->isVoidTy());
17859 .setTailCall(isTailCall)
17866 SDValue PPCTargetLowering::lowerLibCallBasedOnType(
17867 const char *LibCallFloatName,
const char *LibCallDoubleName,
SDValue Op,
17870 return lowerToLibCall(LibCallFloatName,
Op, DAG);
17873 return lowerToLibCall(LibCallDoubleName,
Op, DAG);
17878 bool PPCTargetLowering::isLowringToMASSFiniteSafe(
SDValue Op)
const {
17884 bool PPCTargetLowering::isLowringToMASSSafe(
SDValue Op)
const {
17885 return Op.getNode()->getFlags().hasApproximateFuncs();
17888 bool PPCTargetLowering::isScalarMASSConversionEnabled()
const {
17892 SDValue PPCTargetLowering::lowerLibCallBase(
const char *LibCallDoubleName,
17893 const char *LibCallFloatName,
17894 const char *LibCallDoubleNameFinite,
17895 const char *LibCallFloatNameFinite,
17898 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(
Op))
17901 if (!isLowringToMASSFiniteSafe(
Op))
17902 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName,
Op,
17905 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
17906 LibCallDoubleNameFinite,
Op, DAG);
17910 return lowerLibCallBase(
"__xl_pow",
"__xl_powf",
"__xl_pow_finite",
17911 "__xl_powf_finite",
Op, DAG);
17915 return lowerLibCallBase(
"__xl_sin",
"__xl_sinf",
"__xl_sin_finite",
17916 "__xl_sinf_finite",
Op, DAG);
17920 return lowerLibCallBase(
"__xl_cos",
"__xl_cosf",
"__xl_cos_finite",
17921 "__xl_cosf_finite",
Op, DAG);
17925 return lowerLibCallBase(
"__xl_log",
"__xl_logf",
"__xl_log_finite",
17926 "__xl_logf_finite",
Op, DAG);
17930 return lowerLibCallBase(
"__xl_log10",
"__xl_log10f",
"__xl_log10_finite",
17931 "__xl_log10f_finite",
Op, DAG);
17935 return lowerLibCallBase(
"__xl_exp",
"__xl_expf",
"__xl_exp_finite",
17936 "__xl_expf_finite",
Op, DAG);
17943 if (!isa<FrameIndexSDNode>(
N))
17961 unsigned Flags = computeMOFlags(Parent,
N, DAG);
17973 "Must be using PC-Relative calls when a valid PC-Relative node is "
17989 int16_t
Imm = cast<ConstantSDNode>(Op1)->getAPIntValue().getZExtValue();
18003 Disp =
N.getOperand(1).getOperand(0);
18008 Base =
N.getOperand(0);
18015 auto *CN = cast<ConstantSDNode>(
N);
18016 EVT CNType = CN->getValueType(0);
18017 uint64_t CNImm = CN->getZExtValue();
18030 int32_t
Addr = (int32_t)CNImm;
18051 unsigned Opcode =
N.getOpcode();
18059 Base =
N.getOperand(0);
18078 Base = FI ?
N :
N.getOperand(1);
18090 bool IsVarArg)
const {
18102 return Subtarget.
isPPC64() &&
18129 return Intrinsic::ppc_atomicrmw_xchg_i128;
18131 return Intrinsic::ppc_atomicrmw_add_i128;
18133 return Intrinsic::ppc_atomicrmw_sub_i128;
18135 return Intrinsic::ppc_atomicrmw_and_i128;
18137 return Intrinsic::ppc_atomicrmw_or_i128;
18139 return Intrinsic::ppc_atomicrmw_xor_i128;
18141 return Intrinsic::ppc_atomicrmw_nand_i128;
18155 Value *IncrLo =
Builder.CreateTrunc(Incr, Int64Ty,
"incr_lo");
18157 Builder.CreateTrunc(
Builder.CreateLShr(Incr, 64), Int64Ty,
"incr_hi");
18161 Value *Lo =
Builder.CreateExtractValue(LoHi, 0,
"lo");
18162 Value *Hi =
Builder.CreateExtractValue(LoHi, 1,
"hi");
18163 Lo =
Builder.CreateZExt(Lo, ValTy,
"lo64");
18164 Hi =
Builder.CreateZExt(Hi, ValTy,
"hi64");
18179 Value *CmpLo =
Builder.CreateTrunc(CmpVal, Int64Ty,
"cmp_lo");
18181 Builder.CreateTrunc(
Builder.CreateLShr(CmpVal, 64), Int64Ty,
"cmp_hi");
18182 Value *NewLo =
Builder.CreateTrunc(NewVal, Int64Ty,
"new_lo");
18184 Builder.CreateTrunc(
Builder.CreateLShr(NewVal, 64), Int64Ty,
"new_hi");
18189 Builder.CreateCall(IntCmpXchg, {
Addr, CmpLo, CmpHi, NewLo, NewHi});
18191 Value *Lo =
Builder.CreateExtractValue(LoHi, 0,
"lo");
18192 Value *Hi =
Builder.CreateExtractValue(LoHi, 1,
"hi");
18193 Lo =
Builder.CreateZExt(Lo, ValTy,
"lo64");
18194 Hi =
Builder.CreateZExt(Hi, ValTy,
"hi64");